diff --git a/NAMESPACE b/NAMESPACE index 2458d8280..258b51037 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ export(get_susie_result) export(glmnet_weights) export(harmonize_gwas) export(harmonize_twas) +export(invert_minmax_scaling) export(lasso_weights) export(lbf_to_alpha) export(load_LD_matrix) diff --git a/man/invert_minmax_scaling.Rd b/man/invert_minmax_scaling.Rd new file mode 100644 index 000000000..616feed5c --- /dev/null +++ b/man/invert_minmax_scaling.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/file_utils.R +\name{invert_minmax_scaling} +\alias{invert_minmax_scaling} +\title{Invert min-max [0,2] scaling to recover the original U matrix.} +\usage{ +invert_minmax_scaling(X, u_min, u_max) +} +\arguments{ +\item{X}{Numeric matrix (B x p) of min-max scaled values in [0, 2].} + +\item{u_min}{Numeric vector of per-variant minimum values before scaling.} + +\item{u_max}{Numeric vector of per-variant maximum values before scaling.} +} +\value{ +Matrix of original U values with same dimensions. +} +\description{ +Stochastic genotype data (from rss_ld_sketch) is stored in PLINK2 pgen +format after min-max scaling: U_scaled = 2 * (U - u_min) / (u_max - u_min). +This function exactly inverts that transform using the stored per-variant +u_min and u_max values from the companion .afreq file. +} +\details{ +The recovered U satisfies U'U/B ~ Wishart(B, R)/B, the correct distributional +property for LD-based fine-mapping with dynamic variance tracking. +} diff --git a/src/Makevars.in b/src/Makevars.in index 4ec508c3d..02b6ef269 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -1,4 +1,4 @@ CXX_STD = CXX14 -PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_64BIT_WORD=1 -DSIMDE_ENABLE_NATIVE_ALIASES +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_64BIT_WORD=1 PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) \ $(FLIBS) $(LDFLAGS) diff --git a/src/sdpr_mcmc.cpp b/src/sdpr_mcmc.cpp index b0298ebb2..f8c91ea6b 100644 --- a/src/sdpr_mcmc.cpp +++ b/src/sdpr_mcmc.cpp @@ -1,3 +1,29 @@ +// sdpr_mcmc.cpp -- SDPR MCMC sampler using Armadillo +// +// Clean Armadillo port of the original SDPR by Zhou et al. +// (https://github.com/eldronzhou/SDPR), which used GSL + x86 SSE intrinsics. +// +// Translation strategy: +// GSL gsl_blas_dsymv -> arma::symmatu(M) * v +// GSL gsl_blas_dgemv -> A.t() * v +// GSL gsl_blas_dgemm -> A * B +// GSL gsl_blas_dtrsv -> arma::solve(trimatl(L), v) +// GSL gsl_blas_dtrsm -> arma::solve(trimatl(L), A) +// GSL gsl_blas_ddot -> arma::dot(x, y) +// GSL gsl_blas_daxpy -> y += alpha * x +// GSL gsl_linalg_cholesky_decomp1 -> arma::chol(A, "lower") +// GSL gsl_ran_gamma -> std::gamma_distribution +// GSL gsl_ran_beta -> beta_distribution (ratio of gammas) +// GSL gsl_ran_ugaussian -> std::normal_distribution(0,1) +// GSL gsl_rng_uniform -> std::uniform_real_distribution(0,1) +// +// The original SSE intrinsics (log_ps, exp_ps, _mm_max_ps, _mm_hadd_ps) +// in sample_assignment() are replaced with Armadillo vectorized operations +// (arma::log, arma::exp, arma::max, arma::accu) which delegate to +// platform-optimal SIMD (NEON on ARM, SSE/AVX on x86) through the +// underlying BLAS/compiler auto-vectorization. + +#include #include #include #include @@ -5,17 +31,9 @@ #include #include #include -#ifdef __arm64__ -#include "simde/x86/avx512.h" -#else -#include -#endif -#include "sse_mathfun.h" #include "function_pool.h" #include "sdpr_mcmc.h" -using namespace std::chrono; - using std::cout; using std::endl; using std::thread; using std::ref; using std::vector; using std::ofstream; @@ -23,491 +41,543 @@ using std::string; using std::min; #define square(x) ((x)*(x)) +// --------------------------------------------------------------------------- +// sample_sigma2: Sample cluster variances from inverse-gamma posterior +// var_k ~ InvGamma(suff_stats[k]/2 + a0k, sumsq[k]/2 + b0k) +// Original: mcmc.cpp lines 25-40 +// --------------------------------------------------------------------------- void MCMC_state::sample_sigma2() { - std::gamma_distribution dist; - for (size_t i=1; i(a, b); - cluster_var[i] = 1.0 / dist(r); - if (std::isinf(cluster_var[i])) { - cluster_var[i] = 1e5; - std::cerr << "Cluster variance is infinite." << std::endl; - } - else if (cluster_var[i] == 0) { - cluster_var[i] = 1e-10; - std::cerr << "Cluster variance is zero." << std::endl; - } - } + std::gamma_distribution dist; + for (size_t i = 1; i < M; i++) { + double a = suff_stats[i] / 2.0 + a0k; + double b = 1.0 / (sumsq[i] / 2.0 + b0k); + dist = std::gamma_distribution(a, b); + cluster_var[i] = 1.0 / dist(r); + if (std::isinf(cluster_var[i])) { + cluster_var[i] = 1e5; + Rcpp::Rcerr << "Cluster variance is infinite." << std::endl; + } + else if (cluster_var[i] == 0) { + cluster_var[i] = 1e-10; + Rcpp::Rcerr << "Cluster variance is zero." << std::endl; + } + } } -void MCMC_state::calc_b(size_t j, const mcmc_data &dat, const ldmat_data &ldmat_dat) { - size_t start_i = dat.boundary[j].first; - size_t end_i = dat.boundary[j].second; - arma::vec b_j = b.subvec(start_i, end_i-1); - arma::vec beta_j = beta.subvec(start_i, end_i-1); - - arma::vec diag = ldmat_dat.B[j].diag(); - - // diag(B) * beta - b_j = beta_j % diag; - - // eta^2 * (diag(B) * beta) - eta^2 * B * beta - b_j = eta*eta * (b_j - arma::symmatu(ldmat_dat.B[j]) * beta_j); - - // eta^2 * (diag(B) * beta) - eta^2 * B * beta + eta * A^T * beta_mrg - b_j += eta * ldmat_dat.calc_b_tmp[j]; - - // June 2024 - add to ensure that beta is updating - b.subvec(start_i, end_i - 1) = b_j; +// --------------------------------------------------------------------------- +// calc_b: Compute the b vector for block j +// b_j = eta^2 * (diag(B)*beta - B*beta) + eta * A^T * beta_mrg +// Original: mcmc.cpp lines 42-62 +// gsl_blas_dsymv(CblasUpper, -eta*eta, B, beta, eta*eta, b) +// gsl_blas_daxpy(eta, calc_b_tmp, b) +// --------------------------------------------------------------------------- +void MCMC_state::calc_b(size_t j, const mcmc_data &dat, + const ldmat_data &ldmat_dat) { + size_t start_i = dat.boundary[j].first; + size_t end_i = dat.boundary[j].second; + + arma::vec beta_j = beta.subvec(start_i, end_i - 1); + arma::vec diag_B = ldmat_dat.B[j].diag(); + + // Original GSL: b = eta^2 * diag(B)*beta; + // b = eta^2*b - eta^2 * B*beta (via dsymv with alpha=-eta^2, beta=eta^2) + // b += eta * calc_b_tmp + arma::vec b_j = eta * eta * (diag_B % beta_j + - arma::symmatu(ldmat_dat.B[j]) * beta_j) + + eta * ldmat_dat.calc_b_tmp[j]; + + b.subvec(start_i, end_i - 1) = b_j; } -void MCMC_state::sample_assignment(size_t j, const mcmc_data &dat, const ldmat_data &ldmat_dat) { - size_t start_i = dat.boundary[j].first; - size_t end_i = dat.boundary[j].second; - - vector > prob(end_i-start_i, vector(M)); - vector > tmp(end_i-start_i, vector(M)); - vector Bjj(end_i-start_i); - vector bj(end_i-start_i); - vector rnd(end_i-start_i); - - float max_elem, log_exp_sum = 0; - - std::uniform_real_distribution unif(0.0, 1.0); - - for (size_t i=0; i= 4 - _v = log_ps(_mm_loadu_ps(&prob[i][k])); - _mm_storeu_ps(&tmp[i][k], _v); - } - - for (; k prob[i][k]) ? max_elem : prob[i][k]; - } - - // SSE version log exp sum - _m = _mm_load1_ps(&max_elem); - _v = exp_ps(_mm_sub_ps(_mm_loadu_ps(&prob[i][0]), _m)); - - k = 4; - for (; k= 1: +// C_k = eta^2 * N * B[i,i] * var_k + 1 +// log P(z_i=k) = -0.5*log(C_k) + log(p_k) + (N*b_i)^2 * var_k / (2*C_k) +// --------------------------------------------------------------------------- +void MCMC_state::sample_assignment(size_t j, const mcmc_data &dat, + const ldmat_data &ldmat_dat) { + size_t start_i = dat.boundary[j].first; + size_t end_i = dat.boundary[j].second; + size_t n_snp_blk = end_i - start_i; + + std::uniform_real_distribution unif(0.0f, 1.0f); + + // N = 1.0 after May 21 2021 (absorbed into A, B in solve_ldmat) + float C = static_cast(eta * eta * N); + + // Pre-convert cluster variances and log-probabilities to float vectors + // for Armadillo vectorized ops (matching original's float precision) + arma::fvec cvar(M); + arma::fvec log_p_fvec(M); + for (size_t k = 0; k < M; k++) { + cvar(k) = static_cast(cluster_var[k]); + log_p_fvec(k) = static_cast(log_p[k]); + } + + for (size_t i = 0; i < n_snp_blk; i++) { + float Bjj_i = static_cast(ldmat_dat.B[j](i, i)); + float bj_i = static_cast(b(start_i + i)); + float rnd_i = unif(r); + + // prob[0] = log_p[0] (null cluster) + // prob[k] for k>=1: see math above + arma::fvec prob(M); + prob(0) = log_p_fvec(0); + + // Vectorized: Ck = C * Bjj * cvar[1..M-1] + 1 + arma::fvec Ck = C * Bjj_i * cvar.subvec(1, M - 1) + 1.0f; + + // prob[k] = -0.5*log(Ck) + log_p[k] + (N*bj)^2 * var_k / (2*Ck) + // Original: tmp[k] = log(prob[k]); prob[k] = -0.5*tmp[k] + log_p[k] + ... + prob.subvec(1, M - 1) = -0.5f * arma::log(Ck) + log_p_fvec.subvec(1, M - 1) + + square(N * bj_i) * cvar.subvec(1, M - 1) / (2.0f * Ck); + + // Log-sum-exp for numerical stability (replaces SSE _mm_max_ps + exp_ps + _mm_hadd_ps) + float max_elem = prob.max(); + float log_exp_sum = max_elem + + std::logf(arma::accu(arma::exp(prob - max_elem))); + + // Categorical sampling via inverse CDF + // Original: mcmc.cpp lines 155-163 + cls_assgn[i + start_i] = M - 1; + for (size_t k = 0; k < M - 1; k++) { + rnd_i -= std::expf(prob(k) - log_exp_sum); + if (rnd_i < 0) { + cls_assgn[i + start_i] = k; + break; + } + } + } } +// --------------------------------------------------------------------------- +// update_suffstats: Count SNPs per cluster and sum of squared effects +// Original: mcmc.cpp lines 167-175 +// --------------------------------------------------------------------------- void MCMC_state::update_suffstats() { - std::fill(suff_stats.begin(), suff_stats.end(), 0.0); - std::fill(sumsq.begin(), sumsq.end(), 0.0); - for (size_t i=0; ik} suff_stats[k']) +// Original: mcmc.cpp lines 177-189 +// --------------------------------------------------------------------------- void MCMC_state::sample_V() { - vector a(M-1); - - a[M-2] = suff_stats[M-1]; - for (int i=M-3; i>=0; i--) { - a[i] = suff_stats[i+1] + a[i+1]; - } - - for (size_t i=0; i a(M - 1); + a[M - 2] = suff_stats[M - 1]; + for (int i = M - 3; i >= 0; i--) { + a[i] = suff_stats[i + 1] + a[i + 1]; + } + + for (size_t i = 0; i < M - 1; i++) { + beta_distribution dist(1 + suff_stats[i], alpha + a[i]); + V[i] = dist(r); + } + V[M - 1] = 1; } +// --------------------------------------------------------------------------- +// update_p: Convert stick-breaking fractions V to cluster probabilities p +// p[0] = V[0]; p[k] = V[k] * prod_{j cumprod(M-1); - - cumprod[0] = 1 - V[0]; - - for (size_t i=1; i 0) { - p[M-1] = 1 - sum; - } - else { - p[M-1] = 0; - } - - for (size_t i=0; i cumprod(M - 1); + cumprod[0] = 1 - V[0]; + for (size_t i = 1; i < M - 1; i++) { + cumprod[i] = cumprod[i - 1] * (1 - V[i]); + if (V[i] == 1) { + std::fill(cumprod.begin() + i + 1, cumprod.end(), 0.0); + break; + } + } + + p[0] = V[0]; + for (size_t i = 1; i < M - 1; i++) { + p[i] = cumprod[i - 1] * V[i]; + } + + double sum = std::accumulate(p.begin(), p.end() - 1, 0.0); + p[M - 1] = (1 - sum > 0) ? (1 - sum) : 0; + + for (size_t i = 0; i < M; i++) { + log_p[i] = std::logf(static_cast(p[i]) + 1e-40f); + } } +// --------------------------------------------------------------------------- +// sample_alpha: Sample Dirichlet process concentration parameter +// alpha ~ Gamma(0.1 + m - 1, 1/(0.1 - sum(log(1-V)))) +// Original: mcmc.cpp lines 224-237 +// --------------------------------------------------------------------------- void MCMC_state::sample_alpha() { - double sum = 0, m = 0; - for (size_t i=0; i dist(0.1+m-1, 1.0/(0.1-sum)); - alpha = dist(r); + double sum = 0, m = 0; + for (size_t i = 0; i < M; i++) { + if (V[i] != 1) { + sum += std::log(1 - V[i]); + m++; + } + } + if (m == 0) m = 1; + + std::gamma_distribution dist(0.1 + m - 1, 1.0 / (0.1 - sum)); + alpha = dist(r); } -void MCMC_state::sample_beta(size_t j, const mcmc_data &dat, ldmat_data &ldmat_dat) { - size_t start_i = dat.boundary[j].first; - size_t end_i = dat.boundary[j].second; - - vector causal_list; - for (size_t i=start_i; i dist(0.0, sqrt(C)); - double rv = dist(r) + C*N*bj; - beta_j(causal_list[0]-start_i) = rv; - ldmat_dat.num[j] = bj*rv; - ldmat_dat.denom[j] = square(rv)*Bjj; - return; - } - - arma::vec A_vec(causal_list.size()); - arma::vec A_vec2(causal_list.size()); - - arma::mat B(causal_list.size(), causal_list.size()); - - // June 2024 - Update N and remove C for ease of reading - for (size_t i=0; i dist(0.0, 1.0); - for (size_t i=0; i causal_list; + for (size_t i = start_i; i < end_i; i++) { + if (cls_assgn[i] != 0) { + causal_list.push_back(i); + } + } + + // Zero out this block's betas + beta.subvec(start_i, end_i - 1).zeros(); + + if (causal_list.empty()) { + ldmat_dat.num[j] = 0; + ldmat_dat.denom[j] = 0; + return; + } + + // Single causal SNP: closed-form sampling + // Original: mcmc.cpp lines 259-270 + if (causal_list.size() == 1) { + double var_k = cluster_var[cls_assgn[causal_list[0]]]; + double bj = b(causal_list[0]); + double Bjj = ldmat_dat.B[j](causal_list[0] - start_i, + causal_list[0] - start_i); + // C = var_k / (N * var_k * eta^2 * Bjj + 1) + double C_val = var_k / (N * var_k * square(eta) * Bjj + 1.0); + std::normal_distribution dist(0.0, std::sqrt(C_val)); + double rv = dist(r) + C_val * N * bj; + beta(causal_list[0]) = rv; + ldmat_dat.num[j] = bj * rv; + ldmat_dat.denom[j] = square(rv) * Bjj; + return; + } + + // Multiple causal SNPs: multivariate normal sampling via Cholesky + size_t nc = causal_list.size(); + + // A_vec = N * eta * A^T * beta_mrg (restricted to causal indices) + // Original: mcmc.cpp line 279: N*eta*gsl_vector_get(calc_b_tmp, idx) + arma::vec A_vec(nc); + double C_coeff = square(eta) * N; // Original: C = square(eta)*N (line 273) + + // Build precision matrix B_gamma and A_vec + // Original: mcmc.cpp lines 278-289 + arma::mat B_gamma(nc, nc); + for (size_t i = 0; i < nc; i++) { + size_t idx_i = causal_list[i] - start_i; + A_vec(i) = N * eta * ldmat_dat.calc_b_tmp[j](idx_i); + + for (size_t k = 0; k < nc; k++) { + size_t idx_k = causal_list[k] - start_i; + if (i != k) { + // Off-diagonal: eta^2 * N * B[i,k] + B_gamma(i, k) = C_coeff * ldmat_dat.B[j](idx_i, idx_k); + } else { + // Diagonal: eta^2 * N * B[i,i] + 1/var_k + B_gamma(i, k) = C_coeff * ldmat_dat.B[j](idx_i, idx_i) + + 1.0 / cluster_var[cls_assgn[causal_list[i]]]; + } + } + } + + arma::vec A_vec2 = A_vec; // Save for eta computation + + // Sample z ~ N(0, I) + arma::vec beta_c(nc); + std::normal_distribution dist(0.0, 1.0); + for (size_t i = 0; i < nc; i++) { + beta_c(i) = dist(r); + } + + // Cholesky: B_gamma = L * L^T + // Original: gsl_linalg_cholesky_decomp1(&B.matrix) + arma::mat L = arma::chol(B_gamma, "lower"); + + // mu = L^{-1} * A_vec (forward-solve) + // Original: gsl_blas_dtrsv(CblasLower, CblasNoTrans, CblasNonUnit, &B, A_vec) + A_vec = arma::solve(arma::trimatl(L), A_vec); + + // beta_c = mu + z ~ N(mu, I) + beta_c += A_vec; + + // beta_c = L^{-T} * beta_c ~ N(L^{-T}*mu, (L*L^T)^{-1}) + // Original: gsl_blas_dtrsv(CblasLower, CblasTrans, CblasNonUnit, &B, beta_c) + beta_c = arma::solve(arma::trimatu(L.t()), beta_c); + + // Compute eta-related terms for sample_eta() + // Original: mcmc.cpp lines 312-321 + // Restore diagonal of B_gamma to eta^2 * N * B[i,i] (without 1/var_k) + for (size_t i = 0; i < nc; i++) { + size_t idx_i = causal_list[i] - start_i; + B_gamma(i, i) = C_coeff * ldmat_dat.B[j](idx_i, idx_i); + } + + // num = A_vec2^T * beta_c / eta + ldmat_dat.num[j] = arma::dot(A_vec2, beta_c) / eta; + + // denom = beta_c^T * B_gamma * beta_c / eta^2 + // Original: gsl_blas_dsymv(CblasUpper, 1.0, &B, beta_c, 0, A_vec) + arma::vec tmp = arma::symmatu(B_gamma) * beta_c; + ldmat_dat.denom[j] = arma::dot(beta_c, tmp) / square(eta); + + // Write sampled betas back + for (size_t i = 0; i < nc; i++) { + beta(causal_list[i]) = beta_c(i); + } } +// --------------------------------------------------------------------------- +// compute_h2: Compute heritability h2 = sum_j beta_j^T * R_j * beta_j +// Original: mcmc.cpp lines 331-343 +// --------------------------------------------------------------------------- void MCMC_state::compute_h2(const mcmc_data &dat) { - double h2_tmp = 0; - h2 = 0; - for (size_t j=0; j dist(num_sum/denom_sum, sqrt(1.0/denom_sum)); - eta = dist(r); + double num_sum = std::accumulate(ldmat_dat.num.begin(), + ldmat_dat.num.end(), 0.0); + double denom_sum = std::accumulate(ldmat_dat.denom.begin(), + ldmat_dat.denom.end(), 0.0); + denom_sum += 1e-6; + + std::normal_distribution dist(num_sum / denom_sum, + std::sqrt(1.0 / denom_sum)); + eta = dist(r); } -void solve_ldmat(const mcmc_data &dat, ldmat_data &ldmat_dat, const double a, unsigned sz, int opt_llk) { - for (size_t i=0; i mcmc( - mcmc_data& data, - unsigned sz, - double a = 0.1, - double c = 1.0, - size_t M = 1000, - double a0k = 0.5, - double b0k = 0.5, - int iter = 1000, - int burn = 200, - int thin = 5, - unsigned n_threads = 1, - int opt_llk = 1, - bool verbose = true - ) { - - int n_pst = (iter-burn) / thin; - - ldmat_data ldmat_dat; - - MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz); - - for (size_t i=0; iburn) && (j%thin == 0)) { - state.compute_h2(data); - samples.h2 += state.h2*square(state.eta) / n_pst; - samples.beta += state.eta/n_pst * state.beta; - } - - if (verbose && j % 100 == 0) { - state.compute_h2(data); - cout << j << " iter. h2: " << state.h2*square(state.eta) << " max beta: " << arma::max(state.beta)*state.eta << endl; - } - } - - if (verbose) { - cout << "h2: " << samples.h2 << " max: " << arma::max(samples.beta) << endl; - } - - std::unordered_map results; - results["beta"] = samples.beta; - results["h2"] = arma::vec(1, arma::fill::value(samples.h2)); - - return results; + mcmc_data& data, + unsigned sz, + double a, + double c, + size_t M, + double a0k, + double b0k, + int iter, + int burn, + int thin, + unsigned n_threads, + int opt_llk, + bool verbose + ) { + + int n_pst = (iter - burn) / thin; + + ldmat_data ldmat_dat; + + MCMC_state state(data.beta_mrg.size(), M, a0k, b0k, sz); + + // Deflation correction + for (size_t i = 0; i < data.beta_mrg.size(); i++) { + data.beta_mrg[i] /= c; + } + + MCMC_samples samples(data.beta_mrg.size()); + + solve_ldmat(data, ldmat_dat, a, sz, opt_llk); + state.update_suffstats(); + + Function_pool func_pool(n_threads); + + for (int j = 1; j < iter + 1; j++) { + state.sample_sigma2(); + + for (size_t i = 0; i < data.ref_ld_mat.size(); i++) { + state.calc_b(i, data, ldmat_dat); + } + + // sample_assignment is parallelized across LD blocks + for (size_t i = 0; i < data.ref_ld_mat.size(); i++) { + func_pool.push(std::bind(&MCMC_state::sample_assignment, + &state, i, ref(data), ref(ldmat_dat))); + } + func_pool.waitFinished(); + + state.update_suffstats(); + state.sample_V(); + state.update_p(); + state.sample_alpha(); + + for (size_t i = 0; i < data.ref_ld_mat.size(); i++) { + state.sample_beta(i, data, ldmat_dat); + } + + state.sample_eta(ldmat_dat); + + // Collect posterior samples + if ((j > burn) && (j % thin == 0)) { + state.compute_h2(data); + samples.h2 += state.h2 * square(state.eta) / n_pst; + samples.beta += state.eta / n_pst * state.beta; + } + + if (verbose && j % 100 == 0) { + state.compute_h2(data); + Rcpp::Rcout << j << " iter. h2: " + << state.h2 * square(state.eta) + << " max beta: " + << arma::max(state.beta) * state.eta << endl; + } + } + + if (verbose) { + Rcpp::Rcout << "h2: " << samples.h2 + << " max: " << arma::max(samples.beta) << endl; + } + + std::unordered_map results; + results["beta"] = samples.beta; + results["h2"] = arma::vec(1, arma::fill::value(samples.h2)); + + return results; } diff --git a/src/sdpr_mcmc.h b/src/sdpr_mcmc.h index 1bead8bbe..9de9e533d 100644 --- a/src/sdpr_mcmc.h +++ b/src/sdpr_mcmc.h @@ -148,13 +148,15 @@ MCMC_state(size_t num_snp, size_t max_cluster, \ suff_stats.assign(max_cluster, 0); sumsq.assign(max_cluster, 0.0); V.assign(max_cluster, 0.0); + // Initialize all SNPs to the null cluster (k=0). The original SDPR + // used random initialization (uniform over 0..M-1), but this causes + // the first sample_beta() call to allocate an enormous dense matrix + // (nearly all SNPs are "causal"), crashing with "Mat::init() too large". + // Starting from null is standard MCMC practice and lets the sampler + // discover causal assignments organically. cls_assgn.assign(num_snp, 0); std::random_device rd; r.seed(rd()); - std::uniform_int_distribution dist(0, M-1); - for (size_t i=0; i - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_H) -#define SIMDE_ARM_NEON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TYPES_H) -#define SIMDE_ARM_NEON_TYPES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-bf16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_BFLOAT16_H) -#define SIMDE_BFLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* This implementations is based upon simde-f16.h */ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - */ -#define SIMDE_BFLOAT16_API_PORTABLE 1 - -#define SIMDE_BFLOAT16_API_BF16 2 - -#if !defined(SIMDE_BFLOAT16_API) - #if defined(SIMDE_ARM_NEON_BF16) - #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_BF16 - #else - #define SIMDE_BFLOAT16_API SIMDE_BFLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_BF16 - #include - typedef __bf16 simde_bfloat16; -#elif SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_bfloat16; -#else - #error No 16-bit floating point API. -#endif - -/* Conversion -- convert between single-precision and brain half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_bfloat16 -simde_bfloat16_from_float32 (simde_float32 value) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvth_bf16_f32(value); -#else - simde_bfloat16 res; - char* src = HEDLEY_REINTERPRET_CAST(char*, &value); - // rounding to nearest bfloat16 - // If the 17th bit of value is 1, set the rounding to 1. - uint8_t rounding = 0; - - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - if (src[1] & UINT8_C(0x80)) rounding = 1; - src[2] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[2]) + rounding)); - simde_memcpy(&res, src+2, sizeof(res)); - #else - if (src[2] & UINT8_C(0x80)) rounding = 1; - src[1] = HEDLEY_STATIC_CAST(char, (HEDLEY_STATIC_CAST(uint8_t, src[1]) + rounding)); - simde_memcpy(&res, src, sizeof(res)); - #endif - - return res; -#endif -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_bfloat16_to_float32 (simde_bfloat16 value) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtah_f32_bf16(value); -#else - simde_float32 res = 0.0; - char* _res = HEDLEY_REINTERPRET_CAST(char*, &res); - - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - simde_memcpy(_res+2, &value, sizeof(value)); - #else - simde_memcpy(_res, &value, sizeof(value)); - #endif - - return res; -#endif -} - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_bfloat16, simde_bfloat16, uint16_t) - -#define SIMDE_NANBF simde_uint16_as_bfloat16(0xFFC1) // a quiet Not-a-Number -#define SIMDE_INFINITYBF simde_uint16_as_bfloat16(0x7F80) -#define SIMDE_NINFINITYBF simde_uint16_as_bfloat16(0xFF80) - -#define SIMDE_BFLOAT16_VALUE(value) simde_bfloat16_from_float32(SIMDE_FLOAT32_C(value)) - -#if !defined(simde_isinfbf) && defined(simde_math_isinff) - #define simde_isinfbf(a) simde_math_isinff(simde_bfloat16_to_float32(a)) -#endif -#if !defined(simde_isnanbf) && defined(simde_math_isnanf) - #define simde_isnanbf(a) simde_math_isnanf(simde_bfloat16_to_float32(a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_BFLOAT16_H) */ -/* :: End simde/simde-bf16.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_VECTOR_SUBSCRIPT) - #define SIMDE_ARM_NEON_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) -#else - #define SIMDE_ARM_NEON_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] -#endif - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int8_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_int8x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int16_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_int16x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int32_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_int32x2_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int64_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_int64x1_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint8_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_uint8x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint16_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_uint16x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint32_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_uint32x2_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint64_t, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_uint64x1_private; - -typedef union { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float16, values, 8); - #else - simde_float16 values[4]; - #endif - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_float16x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float32, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_float32x2_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float64, values, 8); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 m64; - #endif -} simde_float64x1_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly8, values, 8); -} simde_poly8x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly16, values, 8); -} simde_poly16x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly64, values, 8); -} simde_poly64x1_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int8_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_int8x16_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int16_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_int16x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int32_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - // SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_int32x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(int64_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_int64x2_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint8_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_uint8x16_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint16_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_uint16x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint32_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_uint32x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(uint64_t, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_uint64x2_private; - -typedef union { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float16, values, 16); - #else - simde_float16 values[8]; - #endif - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128 m128; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_float16x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float32, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128 m128; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_float32x4_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float64, values, 16); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128d m128d; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t neon; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif -} simde_float64x2_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly8, values, 16); -} simde_poly8x16_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly16, values, 16); -} simde_poly16x8_private; - -typedef union { - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_poly64, values, 16); -} simde_poly64x2_private; - -typedef union { - #if SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_BF16 - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_bfloat16, values, 8); - #else - simde_bfloat16 values[4]; - #endif -} simde_bfloat16x4_private; - -typedef union { - #if SIMDE_BFLOAT16_API == SIMDE_BFLOAT16_API_BF16 - SIMDE_ARM_NEON_DECLARE_VECTOR(simde_bfloat16, values, 16); - #else - simde_bfloat16 values[8]; - #endif -} simde_bfloat16x8_private; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32_t simde_float32_t; - typedef poly8_t simde_poly8_t; - typedef poly16_t simde_poly16_t; - - typedef int8x8_t simde_int8x8_t; - typedef int16x4_t simde_int16x4_t; - typedef int32x2_t simde_int32x2_t; - typedef int64x1_t simde_int64x1_t; - typedef uint8x8_t simde_uint8x8_t; - typedef uint16x4_t simde_uint16x4_t; - typedef uint32x2_t simde_uint32x2_t; - typedef uint64x1_t simde_uint64x1_t; - typedef float32x2_t simde_float32x2_t; - typedef poly8x8_t simde_poly8x8_t; - typedef poly16x4_t simde_poly16x4_t; - - typedef int8x16_t simde_int8x16_t; - typedef int16x8_t simde_int16x8_t; - typedef int32x4_t simde_int32x4_t; - typedef int64x2_t simde_int64x2_t; - typedef uint8x16_t simde_uint8x16_t; - typedef uint16x8_t simde_uint16x8_t; - typedef uint32x4_t simde_uint32x4_t; - typedef uint64x2_t simde_uint64x2_t; - typedef float32x4_t simde_float32x4_t; - typedef poly8x16_t simde_poly8x16_t; - typedef poly16x8_t simde_poly16x8_t; - - typedef int8x8x2_t simde_int8x8x2_t; - typedef int16x4x2_t simde_int16x4x2_t; - typedef int32x2x2_t simde_int32x2x2_t; - typedef int64x1x2_t simde_int64x1x2_t; - typedef uint8x8x2_t simde_uint8x8x2_t; - typedef uint16x4x2_t simde_uint16x4x2_t; - typedef uint32x2x2_t simde_uint32x2x2_t; - typedef uint64x1x2_t simde_uint64x1x2_t; - typedef float32x2x2_t simde_float32x2x2_t; - typedef poly8x8x2_t simde_poly8x8x2_t; - typedef poly16x4x2_t simde_poly16x4x2_t; - - typedef int8x16x2_t simde_int8x16x2_t; - typedef int16x8x2_t simde_int16x8x2_t; - typedef int32x4x2_t simde_int32x4x2_t; - typedef int64x2x2_t simde_int64x2x2_t; - typedef uint8x16x2_t simde_uint8x16x2_t; - typedef uint16x8x2_t simde_uint16x8x2_t; - typedef uint32x4x2_t simde_uint32x4x2_t; - typedef uint64x2x2_t simde_uint64x2x2_t; - typedef float32x4x2_t simde_float32x4x2_t; - typedef poly8x16x2_t simde_poly8x16x2_t; - typedef poly16x8x2_t simde_poly16x8x2_t; - - typedef int8x8x3_t simde_int8x8x3_t; - typedef int16x4x3_t simde_int16x4x3_t; - typedef int32x2x3_t simde_int32x2x3_t; - typedef int64x1x3_t simde_int64x1x3_t; - typedef uint8x8x3_t simde_uint8x8x3_t; - typedef uint16x4x3_t simde_uint16x4x3_t; - typedef uint32x2x3_t simde_uint32x2x3_t; - typedef uint64x1x3_t simde_uint64x1x3_t; - typedef float32x2x3_t simde_float32x2x3_t; - typedef poly8x8x3_t simde_poly8x8x3_t; - typedef poly16x4x3_t simde_poly16x4x3_t; - - typedef int8x16x3_t simde_int8x16x3_t; - typedef int16x8x3_t simde_int16x8x3_t; - typedef int32x4x3_t simde_int32x4x3_t; - typedef int64x2x3_t simde_int64x2x3_t; - typedef uint8x16x3_t simde_uint8x16x3_t; - typedef uint16x8x3_t simde_uint16x8x3_t; - typedef uint32x4x3_t simde_uint32x4x3_t; - typedef uint64x2x3_t simde_uint64x2x3_t; - typedef float32x4x3_t simde_float32x4x3_t; - typedef poly8x16x3_t simde_poly8x16x3_t; - typedef poly16x8x3_t simde_poly16x8x3_t; - - typedef int8x8x4_t simde_int8x8x4_t; - typedef int16x4x4_t simde_int16x4x4_t; - typedef int32x2x4_t simde_int32x2x4_t; - typedef int64x1x4_t simde_int64x1x4_t; - typedef uint8x8x4_t simde_uint8x8x4_t; - typedef uint16x4x4_t simde_uint16x4x4_t; - typedef uint32x2x4_t simde_uint32x2x4_t; - typedef uint64x1x4_t simde_uint64x1x4_t; - typedef float32x2x4_t simde_float32x2x4_t; - typedef poly8x8x4_t simde_poly8x8x4_t; - typedef poly16x4x4_t simde_poly16x4x4_t; - - typedef int8x16x4_t simde_int8x16x4_t; - typedef int16x8x4_t simde_int16x8x4_t; - typedef int32x4x4_t simde_int32x4x4_t; - typedef int64x2x4_t simde_int64x2x4_t; - typedef uint8x16x4_t simde_uint8x16x4_t; - typedef uint16x8x4_t simde_uint16x8x4_t; - typedef uint32x4x4_t simde_uint32x4x4_t; - typedef uint64x2x4_t simde_uint64x2x4_t; - typedef float32x4x4_t simde_float32x4x4_t; - typedef poly8x16x4_t simde_poly8x16x4_t; - typedef poly16x8x4_t simde_poly16x8x4_t; - - #if defined(SIMDE_ARM_NEON_FP16) - typedef float16_t simde_float16_t; - typedef float16x4_t simde_float16x4_t; - typedef float16x4x2_t simde_float16x4x2_t; - typedef float16x4x3_t simde_float16x4x3_t; - typedef float16x4x4_t simde_float16x4x4_t; - typedef float16x8_t simde_float16x8_t; - typedef float16x8x2_t simde_float16x8x2_t; - typedef float16x8x3_t simde_float16x8x3_t; - typedef float16x8x4_t simde_float16x8x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #endif - - #if defined(SIMDE_ARM_NEON_BF16) - typedef bfloat16_t simde_bfloat16_t; - typedef bfloat16x4_t simde_bfloat16x4_t; - typedef bfloat16x4x2_t simde_bfloat16x4x2_t; - typedef bfloat16x4x3_t simde_bfloat16x4x3_t; - typedef bfloat16x4x4_t simde_bfloat16x4x4_t; - typedef bfloat16x8_t simde_bfloat16x8_t; - typedef bfloat16x8x2_t simde_bfloat16x8x2_t; - typedef bfloat16x8x3_t simde_bfloat16x8x3_t; - typedef bfloat16x8x4_t simde_bfloat16x8x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - #endif - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - typedef poly64_t simde_poly64_t; - typedef poly64x1_t simde_poly64x1_t; - typedef poly64x2_t simde_poly64x2_t; - typedef poly64x1x2_t simde_poly64x1x2_t; - typedef poly64x2x2_t simde_poly64x2x2_t; - typedef poly64x1x3_t simde_poly64x1x3_t; - typedef poly64x2x3_t simde_poly64x2x3_t; - typedef poly64x1x4_t simde_poly64x1x4_t; - typedef poly64x2x4_t simde_poly64x2x4_t; - #if defined(SIMDE_ARCH_ARM_CRYPTO) - typedef poly128_t simde_poly128_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #endif - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #endif - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64_t simde_float64_t; - typedef float64x1_t simde_float64x1_t; - typedef float64x2_t simde_float64x2_t; - typedef float64x1x2_t simde_float64x1x2_t; - typedef float64x2x2_t simde_float64x2x2_t; - typedef float64x1x3_t simde_float64x1x3_t; - typedef float64x2x3_t simde_float64x2x3_t; - typedef float64x1x4_t simde_float64x1x4_t; - typedef float64x2x4_t simde_float64x2x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN - #endif - -#elif (defined(SIMDE_X86_MMX_NATIVE) || defined(SIMDE_X86_SSE_NATIVE)) && defined(SIMDE_ARM_NEON_FORCE_NATIVE_TYPES) - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - - #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN - - #if defined(SIMDE_X86_MMX_NATIVE) - typedef __m64 simde_int8x8_t; - typedef __m64 simde_int16x4_t; - typedef __m64 simde_int32x2_t; - typedef __m64 simde_int64x1_t; - typedef __m64 simde_uint8x8_t; - typedef __m64 simde_uint16x4_t; - typedef __m64 simde_uint32x2_t; - typedef __m64 simde_uint64x1_t; - typedef __m64 simde_float32x2_t; - typedef __m64 simde_float64x1_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X8 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X4 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X1 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X8 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X4 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X1 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 - #endif - - #if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde_float32x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X4 - #endif - - #if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde_int8x16_t; - typedef __m128i simde_int16x8_t; - typedef __m128i simde_int32x4_t; - typedef __m128i simde_int64x2_t; - typedef __m128i simde_uint8x16_t; - typedef __m128i simde_uint16x8_t; - typedef __m128i simde_uint32x4_t; - typedef __m128i simde_uint64x2_t; - typedef __m128d simde_float64x2_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X8 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X4 - #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X8 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X4 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_ARM_NEON_FORCE_NATIVE_TYPES) - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - - #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT - - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN - - typedef v128_t simde_int8x16_t; - typedef v128_t simde_int16x8_t; - typedef v128_t simde_int32x4_t; - typedef v128_t simde_int64x2_t; - typedef v128_t simde_uint8x16_t; - typedef v128_t simde_uint16x8_t; - typedef v128_t simde_uint32x4_t; - typedef v128_t simde_uint64x2_t; - typedef v128_t simde_float32x4_t; - typedef v128_t simde_float64x2_t; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - - #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN - - typedef SIMDE_POWER_ALTIVEC_VECTOR(signed char) simde_int8x16_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(signed short) simde_int16x8_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_int32x4_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) simde_uint8x16_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) simde_uint16x8_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) simde_uint32x4_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde_float32x4_t; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(signed long long) simde_int64x2_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) simde_uint64x2_t; - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde_float64x2_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 - #endif -#elif defined(SIMDE_VECTOR) - typedef simde_float32 simde_float32_t; - typedef simde_float64 simde_float64_t; - typedef int8_t simde_int8x8_t SIMDE_VECTOR(8); - typedef int16_t simde_int16x4_t SIMDE_VECTOR(8); - typedef int32_t simde_int32x2_t SIMDE_VECTOR(8); - typedef int64_t simde_int64x1_t SIMDE_VECTOR(8); - typedef uint8_t simde_uint8x8_t SIMDE_VECTOR(8); - typedef uint16_t simde_uint16x4_t SIMDE_VECTOR(8); - typedef uint32_t simde_uint32x2_t SIMDE_VECTOR(8); - typedef uint64_t simde_uint64x1_t SIMDE_VECTOR(8); - typedef simde_float32_t simde_float32x2_t SIMDE_VECTOR(8); - typedef simde_float64_t simde_float64x1_t SIMDE_VECTOR(8); - typedef int8_t simde_int8x16_t SIMDE_VECTOR(16); - typedef int16_t simde_int16x8_t SIMDE_VECTOR(16); - typedef int32_t simde_int32x4_t SIMDE_VECTOR(16); - typedef int64_t simde_int64x2_t SIMDE_VECTOR(16); - typedef uint8_t simde_uint8x16_t SIMDE_VECTOR(16); - typedef uint16_t simde_uint16x8_t SIMDE_VECTOR(16); - typedef uint32_t simde_uint32x4_t SIMDE_VECTOR(16); - typedef uint64_t simde_uint64x2_t SIMDE_VECTOR(16); - typedef simde_float32_t simde_float32x4_t SIMDE_VECTOR(16); - typedef simde_float64_t simde_float64x2_t SIMDE_VECTOR(16); - - #if defined(SIMDE_ARM_NEON_FP16) - typedef simde_float16 simde_float16_t; - typedef simde_float16_t simde_float16x4_t SIMDE_VECTOR(8); - typedef simde_float16_t simde_float16x8_t SIMDE_VECTOR(16); - typedef struct simde_float16x4x2_t { - simde_float16x4_t val[2]; - } simde_float16x4x2_t; - typedef struct simde_float16x4x3_t { - simde_float16x4_t val[3]; - } simde_float16x4x3_t; - typedef struct simde_float16x4x4_t { - simde_float16x4_t val[4]; - } simde_float16x4x4_t; - typedef struct simde_float16x8x2_t { - simde_float16x8_t val[2]; - } simde_float16x8x2_t; - typedef struct simde_float16x8x3_t { - simde_float16x8_t val[3]; - } simde_float16x8x3_t; - typedef struct simde_float16x8x4_t { - simde_float16x8_t val[4]; - } simde_float16x8x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #endif - - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN -#else - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_BF16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 - #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT - #define SIMDE_ARM_NEON_NEED_PORTABLE_128BIT - - #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN - #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_POLY) - typedef simde_poly8 simde_poly8_t; - typedef simde_poly16 simde_poly16_t; - - typedef simde_poly8x8_private simde_poly8x8_t; - typedef simde_poly16x4_private simde_poly16x4_t; - typedef simde_poly8x16_private simde_poly8x16_t; - typedef simde_poly16x8_private simde_poly16x8_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_POLY_64_BIT) - typedef simde_poly64 simde_poly64_t; - typedef simde_poly64x1_private simde_poly64x1_t; - typedef simde_poly64x2_private simde_poly64x2_t; - typedef struct simde_poly64x1x2_t { - simde_poly64x1_t val[2]; - } simde_poly64x1x2_t; - typedef struct simde_poly64x2x2_t { - simde_poly64x2_t val[2]; - } simde_poly64x2x2_t; - typedef struct simde_poly64x1x3_t { - simde_poly64x1_t val[3]; - } simde_poly64x1x3_t; - typedef struct simde_poly64x2x3_t { - simde_poly64x2_t val[3]; - } simde_poly64x2x3_t; - typedef struct simde_poly64x1x4_t { - simde_poly64x1_t val[4]; - } simde_poly64x1x4_t; - typedef struct simde_poly64x2x4_t { - simde_poly64x2_t val[4]; - } simde_poly64x2x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_POLY_128_BIT) - typedef simde_poly128 simde_poly128_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_POLY_VXN) - typedef struct simde_poly8x8x2_t { - simde_poly8x8_t val[2]; - } simde_poly8x8x2_t; - typedef struct simde_poly16x4x2_t { - simde_poly16x4_t val[2]; - } simde_poly16x4x2_t; - typedef struct simde_poly8x16x2_t { - simde_poly8x16_t val[2]; - } simde_poly8x16x2_t; - typedef struct simde_poly16x8x2_t { - simde_poly16x8_t val[2]; - } simde_poly16x8x2_t; - - typedef struct simde_poly8x8x3_t { - simde_poly8x8_t val[3]; - } simde_poly8x8x3_t; - typedef struct simde_poly16x4x3_t { - simde_poly16x4_t val[3]; - } simde_poly16x4x3_t; - typedef struct simde_poly8x16x3_t { - simde_poly8x16_t val[3]; - } simde_poly8x16x3_t; - typedef struct simde_poly16x8x3_t { - simde_poly16x8_t val[3]; - } simde_poly16x8x3_t; - - typedef struct simde_poly8x8x4_t { - simde_poly8x8_t val[4]; - } simde_poly8x8x4_t; - typedef struct simde_poly16x4x4_t { - simde_poly16x4_t val[4]; - } simde_poly16x4x4_t; - typedef struct simde_poly8x16x4_t { - simde_poly8x16_t val[4]; - } simde_poly8x16x4_t; - typedef struct simde_poly16x8x4_t { - simde_poly16x8_t val[4]; - } simde_poly16x8x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_BF16) - typedef simde_bfloat16 simde_bfloat16_t; - typedef simde_bfloat16x4_private simde_bfloat16x4_t; - typedef simde_bfloat16x8_private simde_bfloat16x8_t; - typedef struct simde_bfloat16x4x2_t { - simde_bfloat16x4_t val[2]; - } simde_bfloat16x4x2_t; - - typedef struct simde_bfloat16x8x2_t { - simde_bfloat16x8_t val[2]; - } simde_bfloat16x8x2_t; - - typedef struct simde_bfloat16x4x3_t { - simde_bfloat16x4_t val[3]; - } simde_bfloat16x4x3_t; - - typedef struct simde_bfloat16x8x3_t { - simde_bfloat16x8_t val[3]; - } simde_bfloat16x8x3_t; - - typedef struct simde_bfloat16x4x4_t { - simde_bfloat16x4_t val[4]; - } simde_bfloat16x4x4_t; - - typedef struct simde_bfloat16x8x4_t { - simde_bfloat16x8_t val[4]; - } simde_bfloat16x8x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_int8x8_private simde_int8x8_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_int16x4_private simde_int16x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_int32x2_private simde_int32x2_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_int64x1_private simde_int64x1_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_uint8x8_private simde_uint8x8_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_uint16x4_private simde_uint16x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_uint32x2_private simde_uint32x2_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_uint64x1_private simde_uint64x1_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_float32x2_private simde_float32x2_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) - typedef simde_float64x1_private simde_float64x1_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_int8x16_private simde_int8x16_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_int16x8_private simde_int16x8_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_int32x4_private simde_int32x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_int64x2_private simde_int64x2_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_uint8x16_private simde_uint8x16_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_uint16x8_private simde_uint16x8_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_uint32x4_private simde_uint32x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_uint64x2_private simde_uint64x2_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_float32x4_private simde_float32x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) - typedef simde_float64x2_private simde_float64x2_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F16) - typedef simde_float16 simde_float16_t; - typedef simde_float16x4_private simde_float16x4_t; - typedef simde_float16x8_private simde_float16x8_t; - - typedef struct simde_float16x4x2_t { - simde_float16x4_t val[2]; - } simde_float16x4x2_t; - typedef struct simde_float16x4x3_t { - simde_float16x4_t val[3]; - } simde_float16x4x3_t; - typedef struct simde_float16x4x4_t { - simde_float16x4_t val[4]; - } simde_float16x4x4_t; - typedef struct simde_float16x8x2_t { - simde_float16x8_t val[2]; - } simde_float16x8x2_t; - typedef struct simde_float16x8x3_t { - simde_float16x8_t val[3]; - } simde_float16x8x3_t; - typedef struct simde_float16x8x4_t { - simde_float16x8_t val[4]; - } simde_float16x8x4_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32) - typedef simde_float32 simde_float32_t; -#endif -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64) - typedef simde_float64 simde_float64_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_VXN) && !defined(SIMDE_BUG_INTEL_857088) - typedef struct simde_int8x8x2_t { - simde_int8x8_t val[2]; - } simde_int8x8x2_t; - typedef struct simde_int16x4x2_t { - simde_int16x4_t val[2]; - } simde_int16x4x2_t; - typedef struct simde_int32x2x2_t { - simde_int32x2_t val[2]; - } simde_int32x2x2_t; - typedef struct simde_int64x1x2_t { - simde_int64x1_t val[2]; - } simde_int64x1x2_t; - typedef struct simde_uint8x8x2_t { - simde_uint8x8_t val[2]; - } simde_uint8x8x2_t; - typedef struct simde_uint16x4x2_t { - simde_uint16x4_t val[2]; - } simde_uint16x4x2_t; - typedef struct simde_uint32x2x2_t { - simde_uint32x2_t val[2]; - } simde_uint32x2x2_t; - typedef struct simde_uint64x1x2_t { - simde_uint64x1_t val[2]; - } simde_uint64x1x2_t; - typedef struct simde_float32x2x2_t { - simde_float32x2_t val[2]; - } simde_float32x2x2_t; - - typedef struct simde_int8x16x2_t { - simde_int8x16_t val[2]; - } simde_int8x16x2_t; - typedef struct simde_int16x8x2_t { - simde_int16x8_t val[2]; - } simde_int16x8x2_t; - typedef struct simde_int32x4x2_t { - simde_int32x4_t val[2]; - } simde_int32x4x2_t; - typedef struct simde_int64x2x2_t { - simde_int64x2_t val[2]; - } simde_int64x2x2_t; - typedef struct simde_uint8x16x2_t { - simde_uint8x16_t val[2]; - } simde_uint8x16x2_t; - typedef struct simde_uint16x8x2_t { - simde_uint16x8_t val[2]; - } simde_uint16x8x2_t; - typedef struct simde_uint32x4x2_t { - simde_uint32x4_t val[2]; - } simde_uint32x4x2_t; - typedef struct simde_uint64x2x2_t { - simde_uint64x2_t val[2]; - } simde_uint64x2x2_t; - typedef struct simde_float32x4x2_t { - simde_float32x4_t val[2]; - } simde_float32x4x2_t; - - typedef struct simde_int8x8x3_t { - simde_int8x8_t val[3]; - } simde_int8x8x3_t; - typedef struct simde_int16x4x3_t { - simde_int16x4_t val[3]; - } simde_int16x4x3_t; - typedef struct simde_int32x2x3_t { - simde_int32x2_t val[3]; - } simde_int32x2x3_t; - typedef struct simde_int64x1x3_t { - simde_int64x1_t val[3]; - } simde_int64x1x3_t; - typedef struct simde_uint8x8x3_t { - simde_uint8x8_t val[3]; - } simde_uint8x8x3_t; - typedef struct simde_uint16x4x3_t { - simde_uint16x4_t val[3]; - } simde_uint16x4x3_t; - typedef struct simde_uint32x2x3_t { - simde_uint32x2_t val[3]; - } simde_uint32x2x3_t; - typedef struct simde_uint64x1x3_t { - simde_uint64x1_t val[3]; - } simde_uint64x1x3_t; - typedef struct simde_float32x2x3_t { - simde_float32x2_t val[3]; - } simde_float32x2x3_t; - - typedef struct simde_int8x16x3_t { - simde_int8x16_t val[3]; - } simde_int8x16x3_t; - typedef struct simde_int16x8x3_t { - simde_int16x8_t val[3]; - } simde_int16x8x3_t; - typedef struct simde_int32x4x3_t { - simde_int32x4_t val[3]; - } simde_int32x4x3_t; - typedef struct simde_int64x2x3_t { - simde_int64x2_t val[3]; - } simde_int64x2x3_t; - typedef struct simde_uint8x16x3_t { - simde_uint8x16_t val[3]; - } simde_uint8x16x3_t; - typedef struct simde_uint16x8x3_t { - simde_uint16x8_t val[3]; - } simde_uint16x8x3_t; - typedef struct simde_uint32x4x3_t { - simde_uint32x4_t val[3]; - } simde_uint32x4x3_t; - typedef struct simde_uint64x2x3_t { - simde_uint64x2_t val[3]; - } simde_uint64x2x3_t; - typedef struct simde_float32x4x3_t { - simde_float32x4_t val[3]; - } simde_float32x4x3_t; - - typedef struct simde_int8x8x4_t { - simde_int8x8_t val[4]; - } simde_int8x8x4_t; - typedef struct simde_int16x4x4_t { - simde_int16x4_t val[4]; - } simde_int16x4x4_t; - typedef struct simde_int32x2x4_t { - simde_int32x2_t val[4]; - } simde_int32x2x4_t; - typedef struct simde_int64x1x4_t { - simde_int64x1_t val[4]; - } simde_int64x1x4_t; - typedef struct simde_uint8x8x4_t { - simde_uint8x8_t val[4]; - } simde_uint8x8x4_t; - typedef struct simde_uint16x4x4_t { - simde_uint16x4_t val[4]; - } simde_uint16x4x4_t; - typedef struct simde_uint32x2x4_t { - simde_uint32x2_t val[4]; - } simde_uint32x2x4_t; - typedef struct simde_uint64x1x4_t { - simde_uint64x1_t val[4]; - } simde_uint64x1x4_t; - typedef struct simde_float32x2x4_t { - simde_float32x2_t val[4]; - } simde_float32x2x4_t; - - typedef struct simde_int8x16x4_t { - simde_int8x16_t val[4]; - } simde_int8x16x4_t; - typedef struct simde_int16x8x4_t { - simde_int16x8_t val[4]; - } simde_int16x8x4_t; - typedef struct simde_int32x4x4_t { - simde_int32x4_t val[4]; - } simde_int32x4x4_t; - typedef struct simde_int64x2x4_t { - simde_int64x2_t val[4]; - } simde_int64x2x4_t; - typedef struct simde_uint8x16x4_t { - simde_uint8x16_t val[4]; - } simde_uint8x16x4_t; - typedef struct simde_uint16x8x4_t { - simde_uint16x8_t val[4]; - } simde_uint16x8x4_t; - typedef struct simde_uint32x4x4_t { - simde_uint32x4_t val[4]; - } simde_uint32x4x4_t; - typedef struct simde_uint64x2x4_t { - simde_uint64x2_t val[4]; - } simde_uint64x2x4_t; - typedef struct simde_float32x4x4_t { - simde_float32x4_t val[4]; - } simde_float32x4x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN) - typedef struct simde_float64x1x2_t { - simde_float64x1_t val[2]; - } simde_float64x1x2_t; - - typedef struct simde_float64x1x3_t { - simde_float64x1_t val[3]; - } simde_float64x1x3_t; - - typedef struct simde_float64x1x4_t { - simde_float64x1_t val[4]; - } simde_float64x1x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN) - typedef struct simde_float64x2x2_t { - simde_float64x2_t val[2]; - } simde_float64x2x2_t; - - typedef struct simde_float64x2x3_t { - simde_float64x2_t val[3]; - } simde_float64x2x3_t; - - typedef struct simde_float64x2x4_t { - simde_float64x2_t val[4]; - } simde_float64x2x4_t; -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) || defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - typedef simde_float16_t float16_t; -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - typedef simde_float32_t float32_t; - - typedef simde_int8x8_t int8x8_t; - typedef simde_int16x4_t int16x4_t; - typedef simde_int32x2_t int32x2_t; - typedef simde_int64x1_t int64x1_t; - typedef simde_uint8x8_t uint8x8_t; - typedef simde_uint16x4_t uint16x4_t; - typedef simde_uint32x2_t uint32x2_t; - typedef simde_uint64x1_t uint64x1_t; - typedef simde_float32x2_t float32x2_t; - typedef simde_poly8x8_t poly8x8_t; - typedef simde_poly16x4_t poly16x4_t; - - typedef simde_int8x16_t int8x16_t; - typedef simde_int16x8_t int16x8_t; - typedef simde_int32x4_t int32x4_t; - typedef simde_int64x2_t int64x2_t; - typedef simde_uint8x16_t uint8x16_t; - typedef simde_uint16x8_t uint16x8_t; - typedef simde_uint32x4_t uint32x4_t; - typedef simde_uint64x2_t uint64x2_t; - typedef simde_float32x4_t float32x4_t; - typedef simde_poly8x16_t poly8x16_t; - typedef simde_poly16x8_t poly16x8_t; - - typedef simde_int8x8x2_t int8x8x2_t; - typedef simde_int16x4x2_t int16x4x2_t; - typedef simde_int32x2x2_t int32x2x2_t; - typedef simde_int64x1x2_t int64x1x2_t; - typedef simde_uint8x8x2_t uint8x8x2_t; - typedef simde_uint16x4x2_t uint16x4x2_t; - typedef simde_uint32x2x2_t uint32x2x2_t; - typedef simde_uint64x1x2_t uint64x1x2_t; - typedef simde_float32x2x2_t float32x2x2_t; - typedef simde_poly8x8x2_t poly8x8x2_t; - typedef simde_poly16x4x2_t poly16x4x2_t; - - typedef simde_int8x16x2_t int8x16x2_t; - typedef simde_int16x8x2_t int16x8x2_t; - typedef simde_int32x4x2_t int32x4x2_t; - typedef simde_int64x2x2_t int64x2x2_t; - typedef simde_uint8x16x2_t uint8x16x2_t; - typedef simde_uint16x8x2_t uint16x8x2_t; - typedef simde_uint32x4x2_t uint32x4x2_t; - typedef simde_uint64x2x2_t uint64x2x2_t; - typedef simde_float32x4x2_t float32x4x2_t; - typedef simde_poly8x16x2_t poly8x16x2_t; - typedef simde_poly16x8x2_t poly16x8x2_t; - - typedef simde_int8x8x3_t int8x8x3_t; - typedef simde_int16x4x3_t int16x4x3_t; - typedef simde_int32x2x3_t int32x2x3_t; - typedef simde_int64x1x3_t int64x1x3_t; - typedef simde_uint8x8x3_t uint8x8x3_t; - typedef simde_uint16x4x3_t uint16x4x3_t; - typedef simde_uint32x2x3_t uint32x2x3_t; - typedef simde_uint64x1x3_t uint64x1x3_t; - typedef simde_float32x2x3_t float32x2x3_t; - typedef simde_poly8x8x3_t poly8x8x3_t; - typedef simde_poly16x4x3_t poly16x4x3_t; - - typedef simde_int8x16x3_t int8x16x3_t; - typedef simde_int16x8x3_t int16x8x3_t; - typedef simde_int32x4x3_t int32x4x3_t; - typedef simde_int64x2x3_t int64x2x3_t; - typedef simde_uint8x16x3_t uint8x16x3_t; - typedef simde_uint16x8x3_t uint16x8x3_t; - typedef simde_uint32x4x3_t uint32x4x3_t; - typedef simde_uint64x2x3_t uint64x2x3_t; - typedef simde_float32x4x3_t float32x4x3_t; - typedef simde_poly8x16x3_t poly8x16x3_t; - typedef simde_poly16x8x3_t poly16x8x3_t; - - typedef simde_int8x8x4_t int8x8x4_t; - typedef simde_int16x4x4_t int16x4x4_t; - typedef simde_int32x2x4_t int32x2x4_t; - typedef simde_int64x1x4_t int64x1x4_t; - typedef simde_uint8x8x4_t uint8x8x4_t; - typedef simde_uint16x4x4_t uint16x4x4_t; - typedef simde_uint32x2x4_t uint32x2x4_t; - typedef simde_uint64x1x4_t uint64x1x4_t; - typedef simde_float32x2x4_t float32x2x4_t; - typedef simde_poly8x8x4_t poly8x8x4_t; - typedef simde_poly16x4x4_t poly16x4x4_t; - - typedef simde_int8x16x4_t int8x16x4_t; - typedef simde_int16x8x4_t int16x8x4_t; - typedef simde_int32x4x4_t int32x4x4_t; - typedef simde_int64x2x4_t int64x2x4_t; - typedef simde_uint8x16x4_t uint8x16x4_t; - typedef simde_uint16x8x4_t uint16x8x4_t; - typedef simde_uint32x4x4_t uint32x4x4_t; - typedef simde_uint64x2x4_t uint64x2x4_t; - typedef simde_float32x4x4_t float32x4x4_t; - typedef simde_poly8x16x4_t poly8x16x4_t; - typedef simde_poly16x8x4_t poly16x8x4_t; -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - typedef simde_poly64x1_t poly64x1_t; - typedef simde_poly64x2_t poly64x2_t; - typedef simde_poly64x1x2_t poly64x1x2_t; - typedef simde_poly64x2x2_t poly64x2x2_t; - typedef simde_poly64x1x3_t poly64x1x3_t; - typedef simde_poly64x2x3_t poly64x2x3_t; - typedef simde_poly64x1x4_t poly64x1x4_t; - typedef simde_poly64x2x4_t poly64x2x4_t; -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - typedef simde_float64_t float64_t; - typedef simde_float16x4_t float16x4_t; - typedef simde_float64x1_t float64x1_t; - typedef simde_float16x8_t float16x8_t; - typedef simde_float64x2_t float64x2_t; - typedef simde_float64x1x2_t float64x1x2_t; - typedef simde_float64x2x2_t float64x2x2_t; - typedef simde_float64x1x3_t float64x1x3_t; - typedef simde_float64x2x3_t float64x2x3_t; - typedef simde_float64x1x4_t float64x1x4_t; - typedef simde_float64x2x4_t float64x2x4_t; -#endif - -#if defined(SIMDE_X86_MMX_NATIVE) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x8_to_m64, __m64, simde_int8x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x4_to_m64, __m64, simde_int16x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x2_to_m64, __m64, simde_int32x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x1_to_m64, __m64, simde_int64x1_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x8_to_m64, __m64, simde_uint8x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x4_to_m64, __m64, simde_uint16x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x2_to_m64, __m64, simde_uint32x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x1_to_m64, __m64, simde_uint64x1_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x2_to_m64, __m64, simde_float32x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x1_to_m64, __m64, simde_float64x1_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x8_from_m64, simde_int8x8_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x4_from_m64, simde_int16x4_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x2_from_m64, simde_int32x2_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x1_from_m64, simde_int64x1_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x8_from_m64, simde_uint8x8_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x4_from_m64, simde_uint16x4_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x2_from_m64, simde_uint32x2_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x1_from_m64, simde_uint64x1_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x2_from_m64, simde_float32x2_t, __m64) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x1_from_m64, simde_float64x1_t, __m64) -#endif -#if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_to_m128, __m128, simde_float32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_from_m128, simde_float32x4_t, __m128) -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_to_m128i, __m128i, simde_int8x16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_to_m128i, __m128i, simde_int16x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_to_m128i, __m128i, simde_int32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_to_m128i, __m128i, simde_int64x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_to_m128i, __m128i, simde_uint8x16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_to_m128i, __m128i, simde_uint16x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_to_m128i, __m128i, simde_uint32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_to_m128i, __m128i, simde_uint64x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_to_m128d, __m128d, simde_float64x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_from_m128i, simde_int8x16_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_from_m128i, simde_int16x8_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_from_m128i, simde_int32x4_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_from_m128i, simde_int64x2_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_from_m128i, simde_uint8x16_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_from_m128i, simde_uint16x8_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_from_m128i, simde_uint32x4_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_from_m128i, simde_uint64x2_t, __m128i) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_from_m128d, simde_float64x2_t, __m128d) -#endif - -#if defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_to_v128, v128_t, simde_int8x16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_to_v128, v128_t, simde_int16x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_to_v128, v128_t, simde_int32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_to_v128, v128_t, simde_int64x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_to_v128, v128_t, simde_uint8x16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_to_v128, v128_t, simde_uint16x8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_to_v128, v128_t, simde_uint32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_to_v128, v128_t, simde_uint64x2_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_to_v128, v128_t, simde_float32x4_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_to_v128, v128_t, simde_float64x2_t) - - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_from_v128, simde_int8x16_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_from_v128, simde_int16x8_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_from_v128, simde_int32x4_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_from_v128, simde_int64x2_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_from_v128, simde_uint8x16_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_from_v128, simde_uint16x8_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_from_v128, simde_uint32x4_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_from_v128, simde_uint64x2_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_from_v128, simde_float32x4_t, v128_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_from_v128, simde_float64x2_t, v128_t) -#endif - -#define SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(T) \ - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_to_private, simde_##T##_private, simde_##T##_t) \ - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_from_private, simde_##T##_t, simde_##T##_private) \ - -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x1) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x1) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float16x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x1) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly8x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly16x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly64x1) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(bfloat16x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x16) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x16) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly8x16) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly16x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(poly64x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float16x8) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x4) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x2) -SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(bfloat16x8) - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_TYPES_H */ -/* :: End simde/arm/neon/types.h :: */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/aba.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ABA_H) -#define SIMDE_ARM_NEON_ABA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ABD_H) -#define SIMDE_ARM_NEON_ABD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ABS_H) -#define SIMDE_ARM_NEON_ABS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vabsd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0)) - return vabsd_s64(a); - #else - return a < 0 ? -a : a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabsd_s64 - #define vabsd_s64(a) simde_vabsd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vabsh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabsh_f16(a); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - - return (a_ >= 0.0f) ? simde_float16_from_float32(a_) : simde_float16_from_float32(-a_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vabsh_f16 - #define vabsh_f16(a) simde_vabsh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vabs_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabs_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vabsh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vabs_f16 - #define vabs_f16(a) simde_vabs_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vabs_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabs_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabs_f32 - #define vabs_f32(a) simde_vabs_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vabs_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabs_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabs_f64 - #define vabs_f64(a) simde_vabs_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vabs_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabs_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_abs_pi8(a_.m64); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabs_s8 - #define vabs_s8(a) simde_vabs_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vabs_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabs_s16(a); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_abs_pi16(a_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabs_s16 - #define vabs_s16(a) simde_vabs_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vabs_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabs_s32(a); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_abs_pi32(a_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabs_s32 - #define vabs_s32(a) simde_vabs_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vabs_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabs_s64(a); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabs_s64 - #define vabs_s64(a) simde_vabs_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vabsq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabsq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vabsh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vabsq_f16 - #define vabsq_f16(a) simde_vabsq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vabsq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabsq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_abs(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_abs(a_.v128); - #elif defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - r_.m128 = _mm_and_ps(_mm_set1_ps(mask_), a_.m128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_fabsf(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabsq_f32 - #define vabsq_f32(a) simde_vabsq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vabsq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabsq_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_abs(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - r_.m128d = _mm_and_pd(_mm_set1_pd(mask_), a_.m128d); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_fabs(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabsq_f64 - #define vabsq_f64(a) simde_vabsq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vabsq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabsq_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_abs(a); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_abs_epi8(a_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_min_epu8(a_.m128i, _mm_sub_epi8(_mm_setzero_si128(), a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_abs(a_.v128); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabsq_s8 - #define vabsq_s8(a) simde_vabsq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabsq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabsq_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_abs(a); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_abs_epi16(a_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_max_epi16(a_.m128i, _mm_sub_epi16(_mm_setzero_si128(), a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_abs(a_.v128); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabsq_s16 - #define vabsq_s16(a) simde_vabsq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabsq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabsq_s32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_abs(a); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_abs_epi32(a_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a_.m128i); - r_.m128i = _mm_sub_epi32(_mm_xor_si128(a_.m128i, m), m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_abs(a_.v128); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? HEDLEY_STATIC_CAST(int32_t, 0 - HEDLEY_STATIC_CAST(uint32_t, a_.values[i])) : a_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabsq_s32 - #define vabsq_s32(a) simde_vabsq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vabsq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabsq_s64(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_abs(a); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_abs_epi64(a_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a_.m128i, 0xF5), 31); - r_.m128i = _mm_sub_epi64(_mm_xor_si128(a_.m128i, m), m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_abs(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); - r_.values = (-a_.values & m) | (a_.values & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] < 0 ? HEDLEY_STATIC_CAST(int64_t, 0 - HEDLEY_STATIC_CAST(uint64_t, a_.values[i])) : a_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabsq_s64 - #define vabsq_s64(a) simde_vabsq_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ABS_H) */ -/* :: End simde/arm/neon/abs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_SUBL_H) -#define SIMDE_ARM_NEON_SUBL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SUB_H) -#define SIMDE_ARM_NEON_SUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16 -simde_vsubh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsubh_f16(a, b); - #else - simde_float32 af = simde_float16_to_float32(a); - simde_float32 bf = simde_float16_to_float32(b); - return simde_float16_from_float32(af - bf); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsubh_f16 - #define vsubh_f16(a, b) simde_vsubh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vsubd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubd_s64(a, b); - #else - return a - b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubd_s64 - #define vsubd_s64(a, b) simde_vsubd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vsubd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubd_u64(a, b); - #else - return a - b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubd_u64 - #define vsubd_u64(a, b) simde_vsubd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vsub_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsub_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsubh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsub_f16 - #define vsub_f16(a, b) simde_vsub_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vsub_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_f32 - #define vsub_f32(a, b) simde_vsub_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vsub_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsub_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsub_f64 - #define vsub_f64(a, b) simde_vsub_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vsub_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_s8 - #define vsub_s8(a, b) simde_vsub_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vsub_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_s16 - #define vsub_s16(a, b) simde_vsub_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vsub_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_s32 - #define vsub_s32(a, b) simde_vsub_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vsub_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsubd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_s64 - #define vsub_s64(a, b) simde_vsub_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_u8 - #define vsub_u8(a, b) simde_vsub_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_u16 - #define vsub_u16(a, b) simde_vsub_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_sub_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_u32 - #define vsub_u32(a, b) simde_vsub_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsub_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsubd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsub_u64 - #define vsub_u64(a, b) simde_vsub_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vsubq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsubq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t tmp_a_ = simde_float16_to_float32(a_.values[i]); - simde_float32_t tmp_b_ = simde_float16_to_float32(b_.values[i]); - r_.values[i] = simde_float16_from_float32(tmp_a_ - tmp_b_); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsubq_f16 - #define vsubq_f16(a, b) simde_vsubq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vsubq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; - a_ = a; - b_ = b; - r_ = vec_sub(a_, b_); - return r_; - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_sub_ps(a_.m128, b_.m128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_f32 - #define vsubq_f32(a, b) simde_vsubq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vsubq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_sub(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_sub_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubq_f64 - #define vsubq_f64(a, b) simde_vsubq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_s8 - #define vsubq_s8(a, b) simde_vsubq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_s16 - #define vsubq_s16(a, b) simde_vsubq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_s32 - #define vsubq_s32(a, b) simde_vsubq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_sub(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_sub(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsubd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_s64 - #define vsubq_s64(a, b) simde_vsubq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_u8 - #define vsubq_u8(a, b) simde_vsubq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_u16 - #define vsubq_u16(a, b) simde_vsubq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_u32 - #define vsubq_u32(a, b) simde_vsubq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_sub(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsubd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubq_u64 - #define vsubq_u64(a, b) simde_vsubq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUB_H) */ -/* :: End simde/arm/neon/sub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/movl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MOVL_H) -#define SIMDE_ARM_NEON_MOVL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/combine.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the folhighing conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_COMBINE_H) -#define SIMDE_ARM_NEON_COMBINE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcombine_f16(simde_float16x4_t low, simde_float16x4_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcombine_f16(low, high); - #else - simde_float16x8_private r_; - simde_float16x4_private - low_ = simde_float16x4_to_private(low), - high_ = simde_float16x4_to_private(high); - - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_f16 - #define vcombine_f16(low, high) simde_vcombine_f16((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_f32(low, high); - #else - simde_float32x4_private r_; - simde_float32x2_private - low_ = simde_float32x2_to_private(low), - high_ = simde_float32x2_to_private(high); - - /* Note: __builtin_shufflevector can have a the output contain - * twice the number of elements, __builtin_shuffle cannot. - * Using SIMDE_SHUFFLE_VECTOR_ here would not work. */ - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_f32 - #define vcombine_f32(low, high) simde_vcombine_f32((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcombine_f64(low, high); - #else - simde_float64x2_private r_; - simde_float64x1_private - low_ = simde_float64x1_to_private(low), - high_ = simde_float64x1_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcombine_f64 - #define vcombine_f64(low, high) simde_vcombine_f64((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_s8(low, high); - #else - simde_int8x16_private r_; - simde_int8x8_private - low_ = simde_int8x8_to_private(low), - high_ = simde_int8x8_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_s8 - #define vcombine_s8(low, high) simde_vcombine_s8((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_s16(low, high); - #else - simde_int16x8_private r_; - simde_int16x4_private - low_ = simde_int16x4_to_private(low), - high_ = simde_int16x4_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_s16 - #define vcombine_s16(low, high) simde_vcombine_s16((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_s32(low, high); - #else - simde_int32x4_private r_; - simde_int32x2_private - low_ = simde_int32x2_to_private(low), - high_ = simde_int32x2_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_s32 - #define vcombine_s32(low, high) simde_vcombine_s32((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_s64(low, high); - #else - simde_int64x2_private r_; - simde_int64x1_private - low_ = simde_int64x1_to_private(low), - high_ = simde_int64x1_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_s64 - #define vcombine_s64(low, high) simde_vcombine_s64((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_u8(low, high); - #else - simde_uint8x16_private r_; - simde_uint8x8_private - low_ = simde_uint8x8_to_private(low), - high_ = simde_uint8x8_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_u8 - #define vcombine_u8(low, high) simde_vcombine_u8((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_u16(low, high); - #else - simde_uint16x8_private r_; - simde_uint16x4_private - low_ = simde_uint16x4_to_private(low), - high_ = simde_uint16x4_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_u16 - #define vcombine_u16(low, high) simde_vcombine_u16((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_u32(low, high); - #else - simde_uint32x4_private r_; - simde_uint32x2_private - low_ = simde_uint32x2_to_private(low), - high_ = simde_uint32x2_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_u32 - #define vcombine_u32(low, high) simde_vcombine_u32((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_u64(low, high); - #else - simde_uint64x2_private r_; - simde_uint64x1_private - low_ = simde_uint64x1_to_private(low), - high_ = simde_uint64x1_to_private(high); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); - #else - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_u64 - #define vcombine_u64(low, high) simde_vcombine_u64((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vcombine_p8(simde_poly8x8_t low, simde_poly8x8_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_p8(low, high); - #else - simde_poly8x16_private r_; - simde_poly8x8_private - low_ = simde_poly8x8_to_private(low), - high_ = simde_poly8x8_to_private(high); - - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_p8 - #define vcombine_p8(low, high) simde_vcombine_p8((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vcombine_p16(simde_poly16x4_t low, simde_poly16x4_t high) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcombine_p16(low, high); - #else - simde_poly16x8_private r_; - simde_poly16x4_private - low_ = simde_poly16x4_to_private(low), - high_ = simde_poly16x4_to_private(high); - - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcombine_p16 - #define vcombine_p16(low, high) simde_vcombine_p16((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vcombine_p64(simde_poly64x1_t low, simde_poly64x1_t high) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcombine_p64(low, high); - #else - simde_poly64x2_private r_; - simde_poly64x1_private - low_ = simde_poly64x1_to_private(low), - high_ = simde_poly64x1_to_private(high); - - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcombine_p64 - #define vcombine_p64(low, high) simde_vcombine_p64((low), (high)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vcombine_bf16(simde_bfloat16x4_t low, simde_bfloat16x4_t high) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcombine_bf16(low, high); - #else - simde_bfloat16x8_private r_; - simde_bfloat16x4_private - low_ = simde_bfloat16x4_to_private(low), - high_ = simde_bfloat16x4_to_private(high); - - size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - r_.values[i] = low_.values[i]; - r_.values[i + halfway] = high_.values[i]; - } - - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcombine_bf16 - #define vcombine_bf16(low, high) simde_vcombine_bf16((low), (high)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */ -/* :: End simde/arm/neon/combine.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmovl_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(simde_vcombine_s8(a, a)); - - r_.v128 = wasm_i16x8_extend_low_i8x16(a_.v128); - - return simde_int16x8_from_private(r_); - #else - simde_int16x8_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_s8 - #define vmovl_s8(a) simde_vmovl_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmovl_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int32x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(simde_vcombine_s16(a, a)); - - r_.v128 = wasm_i32x4_extend_low_i16x8(a_.v128); - - return simde_int32x4_from_private(r_); - #else - simde_int32x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_s16 - #define vmovl_s16(a) simde_vmovl_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmovl_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(simde_vcombine_s32(a, a)); - - r_.v128 = wasm_i64x2_extend_low_i32x4(a_.v128); - - return simde_int64x2_from_private(r_); - #else - simde_int64x2_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_s32 - #define vmovl_s32(a) simde_vmovl_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmovl_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_u8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a)); - - r_.v128 = wasm_u16x8_extend_low_u8x16(a_.v128); - - return simde_uint16x8_from_private(r_); - #else - simde_uint16x8_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_u8 - #define vmovl_u8(a) simde_vmovl_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmovl_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_u16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint32x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vcombine_u16(a, a)); - - r_.v128 = wasm_u32x4_extend_low_u16x8(a_.v128); - - return simde_uint32x4_from_private(r_); - #else - simde_uint32x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_u16 - #define vmovl_u16(a) simde_vmovl_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmovl_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovl_u32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vcombine_u32(a, a)); - - r_.v128 = wasm_u64x2_extend_low_u32x4(a_.v128); - - return simde_uint64x2_from_private(r_); - #else - simde_uint64x2_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovl_u32 - #define vmovl_u32(a) simde_vmovl_u32((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MOVL_H) */ -/* :: End simde/arm/neon/movl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/movl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) -#define SIMDE_ARM_NEON_MOVL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/get_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_GET_HIGH_H) -#define SIMDE_ARM_NEON_GET_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vget_high_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vget_high_f16(a); - #else - simde_float16x4_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_f16 - #define vget_high_f16(a) simde_vget_high_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vget_high_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_f32(a); - #else - simde_float32x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_f32 - #define vget_high_f32(a) simde_vget_high_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vget_high_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vget_high_f64(a); - #else - simde_float64x1_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vget_high_f64 - #define vget_high_f64(a) simde_vget_high_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vget_high_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_s8(a); - #else - simde_int8x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_s8 - #define vget_high_s8(a) simde_vget_high_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vget_high_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_s16(a); - #else - simde_int16x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_s16 - #define vget_high_s16(a) simde_vget_high_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vget_high_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_s32(a); - #else - simde_int32x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_s32 - #define vget_high_s32(a) simde_vget_high_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vget_high_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_s64(a); - #else - simde_int64x1_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_s64 - #define vget_high_s64(a) simde_vget_high_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vget_high_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_u8(a); - #else - simde_uint8x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14,15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_u8 - #define vget_high_u8(a) simde_vget_high_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vget_high_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_u16(a); - #else - simde_uint16x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_u16 - #define vget_high_u16(a) simde_vget_high_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vget_high_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_u32(a); - #else - simde_uint32x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_u32 - #define vget_high_u32(a) simde_vget_high_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vget_high_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_u64(a); - #else - simde_uint64x1_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_u64 - #define vget_high_u64(a) simde_vget_high_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vget_high_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_p8(a); - #else - simde_poly8x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_p8 - #define vget_high_p8(a) simde_vget_high_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vget_high_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_high_p16(a); - #else - simde_poly16x4_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_high_p16 - #define vget_high_p16(a) simde_vget_high_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vget_high_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vget_high_p64(a); - #else - simde_poly64x1_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_high_p64 - #define vget_high_p64(a) simde_vget_high_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vget_high_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vget_high_bf16(a); - #else - simde_bfloat16x4_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; - } - - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_high_bf16 - #define vget_high_bf16(a) simde_vget_high_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_GET_HIGH_H) */ -/* :: End simde/arm/neon/get_high.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmovl_high_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_s8(a); - #else - return simde_vmovl_s8(simde_vget_high_s8(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_s8 - #define vmovl_high_s8(a) simde_vmovl_high_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmovl_high_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_s16(a); - #else - return simde_vmovl_s16(simde_vget_high_s16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_s16 - #define vmovl_high_s16(a) simde_vmovl_high_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmovl_high_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_s32(a); - #else - return simde_vmovl_s32(simde_vget_high_s32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_s32 - #define vmovl_high_s32(a) simde_vmovl_high_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmovl_high_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_u8(a); - #else - return simde_vmovl_u8(simde_vget_high_u8(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_u8 - #define vmovl_high_u8(a) simde_vmovl_high_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmovl_high_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_u16(a); - #else - return simde_vmovl_u16(simde_vget_high_u16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_u16 - #define vmovl_high_u16(a) simde_vmovl_high_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmovl_high_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovl_high_u32(a); - #else - return simde_vmovl_u32(simde_vget_high_u32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovl_high_u32 - #define vmovl_high_u32(a) simde_vmovl_high_u32((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) */ -/* :: End simde/arm/neon/movl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsubl_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_s8(a, b); - #else - return simde_vsubq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_s8 - #define vsubl_s8(a, b) simde_vsubl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsubl_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_s16(a, b); - #else - return simde_vsubq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_s16 - #define vsubl_s16(a, b) simde_vsubl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsubl_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_s32(a, b); - #else - return simde_vsubq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_s32 - #define vsubl_s32(a, b) simde_vsubl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsubl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_u8(a, b); - #else - return simde_vsubq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_u8 - #define vsubl_u8(a, b) simde_vsubl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsubl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_u16(a, b); - #else - return simde_vsubq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_u16 - #define vsubl_u16(a, b) simde_vsubl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsubl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubl_u32(a, b); - #else - return simde_vsubq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubl_u32 - #define vsubl_u32(a, b) simde_vsubl_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBL_H) */ -/* :: End simde/arm/neon/subl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/movn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MOVN_H) -#define SIMDE_ARM_NEON_MOVN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmovn_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_s16(a); - #else - simde_int8x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i]); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_s16 - #define vmovn_s16(a) simde_vmovn_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmovn_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_s32(a); - #else - simde_int16x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_s32 - #define vmovn_s32(a) simde_vmovn_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmovn_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_s64(a); - #else - simde_int32x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_s64 - #define vmovn_s64(a) simde_vmovn_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmovn_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_u16(a); - #else - simde_uint8x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i]); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_u16 - #define vmovn_u16(a) simde_vmovn_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmovn_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_u32(a); - #else - simde_uint16x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_u32 - #define vmovn_u32(a) simde_vmovn_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmovn_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmovn_u64(a); - #else - simde_uint32x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmovn_u64 - #define vmovn_u64(a) simde_vmovn_u64((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MOVN_H) */ -/* :: End simde/arm/neon/movn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/reinterpret.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - - -#if !defined(SIMDE_ARM_NEON_REINTERPRET_H) -#define SIMDE_ARM_NEON_REINTERPRET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_s16(a); - #else - simde_int8x8_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_s16 - #define vreinterpret_s8_s16 simde_vreinterpret_s8_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_s32(a); - #else - simde_int8x8_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_s32 - #define vreinterpret_s8_s32 simde_vreinterpret_s8_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_s64(a); - #else - simde_int8x8_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_s64 - #define vreinterpret_s8_s64 simde_vreinterpret_s8_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_u8(a); - #else - simde_int8x8_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_u8 - #define vreinterpret_s8_u8 simde_vreinterpret_s8_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_u16(a); - #else - simde_int8x8_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_u16 - #define vreinterpret_s8_u16 simde_vreinterpret_s8_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_u32(a); - #else - simde_int8x8_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_u32 - #define vreinterpret_s8_u32 simde_vreinterpret_s8_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_u64(a); - #else - simde_int8x8_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_u64 - #define vreinterpret_s8_u64 simde_vreinterpret_s8_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_f32(a); - #else - simde_int8x8_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_f32 - #define vreinterpret_s8_f32 simde_vreinterpret_s8_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_s8_f64(a); - #else - simde_int8x8_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_f64 - #define vreinterpret_s8_f64 simde_vreinterpret_s8_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_s16(a); - #else - simde_int8x16_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_s16 - #define vreinterpretq_s8_s16(a) simde_vreinterpretq_s8_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_s32(a); - #else - simde_int8x16_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_s32 - #define vreinterpretq_s8_s32(a) simde_vreinterpretq_s8_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_s64(a); - #else - simde_int8x16_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_s64 - #define vreinterpretq_s8_s64(a) simde_vreinterpretq_s8_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_u8(a); - #else - simde_int8x16_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_u8 - #define vreinterpretq_s8_u8(a) simde_vreinterpretq_s8_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_u16(a); - #else - simde_int8x16_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_u16 - #define vreinterpretq_s8_u16(a) simde_vreinterpretq_s8_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_u32(a); - #else - simde_int8x16_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_u32 - #define vreinterpretq_s8_u32(a) simde_vreinterpretq_s8_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_u64(a); - #else - simde_int8x16_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_u64 - #define vreinterpretq_s8_u64(a) simde_vreinterpretq_s8_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_f32(a); - #else - simde_int8x16_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_f32 - #define vreinterpretq_s8_f32(a) simde_vreinterpretq_s8_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s8_f64(a); - #else - simde_int8x16_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_f64 - #define vreinterpretq_s8_f64(a) simde_vreinterpretq_s8_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_s8(a); - #else - simde_int16x4_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_s8 - #define vreinterpret_s16_s8 simde_vreinterpret_s16_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_s32(a); - #else - simde_int16x4_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_s32 - #define vreinterpret_s16_s32 simde_vreinterpret_s16_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_s64(a); - #else - simde_int16x4_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_s64 - #define vreinterpret_s16_s64 simde_vreinterpret_s16_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_u8(a); - #else - simde_int16x4_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_u8 - #define vreinterpret_s16_u8 simde_vreinterpret_s16_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_u16(a); - #else - simde_int16x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_u16 - #define vreinterpret_s16_u16 simde_vreinterpret_s16_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_u32(a); - #else - simde_int16x4_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_u32 - #define vreinterpret_s16_u32 simde_vreinterpret_s16_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_u64(a); - #else - simde_int16x4_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_u64 - #define vreinterpret_s16_u64 simde_vreinterpret_s16_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_f32(a); - #else - simde_int16x4_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_f32 - #define vreinterpret_s16_f32 simde_vreinterpret_s16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_s16_f64(a); - #else - simde_int16x4_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_f64 - #define vreinterpret_s16_f64 simde_vreinterpret_s16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_s8(a); - #else - simde_int16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_s8 - #define vreinterpretq_s16_s8(a) simde_vreinterpretq_s16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_s32(a); - #else - simde_int16x8_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_s32 - #define vreinterpretq_s16_s32(a) simde_vreinterpretq_s16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_s64(a); - #else - simde_int16x8_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_s64 - #define vreinterpretq_s16_s64(a) simde_vreinterpretq_s16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_u8(a); - #else - simde_int16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_u8 - #define vreinterpretq_s16_u8(a) simde_vreinterpretq_s16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_u16(a); - #else - simde_int16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_u16 - #define vreinterpretq_s16_u16(a) simde_vreinterpretq_s16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_u32(a); - #else - simde_int16x8_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_u32 - #define vreinterpretq_s16_u32(a) simde_vreinterpretq_s16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_u64(a); - #else - simde_int16x8_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_u64 - #define vreinterpretq_s16_u64(a) simde_vreinterpretq_s16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_f32(a); - #else - simde_int16x8_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_f32 - #define vreinterpretq_s16_f32(a) simde_vreinterpretq_s16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s16_f64(a); - #else - simde_int16x8_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_f64 - #define vreinterpretq_s16_f64(a) simde_vreinterpretq_s16_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_s8(a); - #else - simde_int32x2_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_s8 - #define vreinterpret_s32_s8 simde_vreinterpret_s32_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_s16(a); - #else - simde_int32x2_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_s16 - #define vreinterpret_s32_s16 simde_vreinterpret_s32_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_s64(a); - #else - simde_int32x2_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_s64 - #define vreinterpret_s32_s64 simde_vreinterpret_s32_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_u8(a); - #else - simde_int32x2_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_u8 - #define vreinterpret_s32_u8 simde_vreinterpret_s32_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_u16(a); - #else - simde_int32x2_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_u16 - #define vreinterpret_s32_u16 simde_vreinterpret_s32_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_u32(a); - #else - simde_int32x2_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_u32 - #define vreinterpret_s32_u32 simde_vreinterpret_s32_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_u64(a); - #else - simde_int32x2_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_u64 - #define vreinterpret_s32_u64 simde_vreinterpret_s32_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_f32(a); - #else - simde_int32x2_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_f32 - #define vreinterpret_s32_f32 simde_vreinterpret_s32_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_s32_f64(a); - #else - simde_int32x2_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_f64 - #define vreinterpret_s32_f64 simde_vreinterpret_s32_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_s8(a); - #else - simde_int32x4_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_s8 - #define vreinterpretq_s32_s8(a) simde_vreinterpretq_s32_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_s16(a); - #else - simde_int32x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_s16 - #define vreinterpretq_s32_s16(a) simde_vreinterpretq_s32_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_s64(a); - #else - simde_int32x4_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_s64 - #define vreinterpretq_s32_s64(a) simde_vreinterpretq_s32_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_u8(a); - #else - simde_int32x4_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_u8 - #define vreinterpretq_s32_u8(a) simde_vreinterpretq_s32_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_u16(a); - #else - simde_int32x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_u16 - #define vreinterpretq_s32_u16(a) simde_vreinterpretq_s32_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_u32(a); - #else - simde_int32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_u32 - #define vreinterpretq_s32_u32(a) simde_vreinterpretq_s32_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_u64(a); - #else - simde_int32x4_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_u64 - #define vreinterpretq_s32_u64(a) simde_vreinterpretq_s32_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_f32(a); - #else - simde_int32x4_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_f32 - #define vreinterpretq_s32_f32(a) simde_vreinterpretq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s32_f64(a); - #else - simde_int32x4_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_f64 - #define vreinterpretq_s32_f64(a) simde_vreinterpretq_s32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_s8(a); - #else - simde_int64x1_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_s8 - #define vreinterpret_s64_s8 simde_vreinterpret_s64_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_s16(a); - #else - simde_int64x1_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_s16 - #define vreinterpret_s64_s16 simde_vreinterpret_s64_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_s32(a); - #else - simde_int64x1_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_s32 - #define vreinterpret_s64_s32 simde_vreinterpret_s64_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_u8(a); - #else - simde_int64x1_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_u8 - #define vreinterpret_s64_u8 simde_vreinterpret_s64_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_u16(a); - #else - simde_int64x1_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_u16 - #define vreinterpret_s64_u16 simde_vreinterpret_s64_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_u32(a); - #else - simde_int64x1_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_u32 - #define vreinterpret_s64_u32 simde_vreinterpret_s64_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_u64(a); - #else - simde_int64x1_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_u64 - #define vreinterpret_s64_u64 simde_vreinterpret_s64_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_f32(a); - #else - simde_int64x1_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_f32 - #define vreinterpret_s64_f32 simde_vreinterpret_s64_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_s64_f64(a); - #else - simde_int64x1_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_f64 - #define vreinterpret_s64_f64 simde_vreinterpret_s64_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_s8(a); - #else - simde_int64x2_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_s8 - #define vreinterpretq_s64_s8(a) simde_vreinterpretq_s64_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_s16(a); - #else - simde_int64x2_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_s16 - #define vreinterpretq_s64_s16(a) simde_vreinterpretq_s64_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_s32(a); - #else - simde_int64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_s32 - #define vreinterpretq_s64_s32(a) simde_vreinterpretq_s64_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_u8(a); - #else - simde_int64x2_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_u8 - #define vreinterpretq_s64_u8(a) simde_vreinterpretq_s64_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_u16(a); - #else - simde_int64x2_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_u16 - #define vreinterpretq_s64_u16(a) simde_vreinterpretq_s64_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_u32(a); - #else - simde_int64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_u32 - #define vreinterpretq_s64_u32(a) simde_vreinterpretq_s64_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_u64(a); - #else - simde_int64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_u64 - #define vreinterpretq_s64_u64(a) simde_vreinterpretq_s64_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_f32(a); - #else - simde_int64x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_f32 - #define vreinterpretq_s64_f32(a) simde_vreinterpretq_s64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde_int64x2_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_f64 - #define vreinterpretq_s64_f64(a) simde_vreinterpretq_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_s8(a); - #else - simde_uint8x8_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_s8 - #define vreinterpret_u8_s8 simde_vreinterpret_u8_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_s16(a); - #else - simde_uint8x8_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_s16 - #define vreinterpret_u8_s16 simde_vreinterpret_u8_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_s32(a); - #else - simde_uint8x8_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_s32 - #define vreinterpret_u8_s32 simde_vreinterpret_u8_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_s64(a); - #else - simde_uint8x8_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_s64 - #define vreinterpret_u8_s64 simde_vreinterpret_u8_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_u16(a); - #else - simde_uint8x8_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_u16 - #define vreinterpret_u8_u16 simde_vreinterpret_u8_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_u32(a); - #else - simde_uint8x8_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_u32 - #define vreinterpret_u8_u32 simde_vreinterpret_u8_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_u64(a); - #else - simde_uint8x8_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_u64 - #define vreinterpret_u8_u64 simde_vreinterpret_u8_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_f32(a); - #else - simde_uint8x8_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_f32 - #define vreinterpret_u8_f32 simde_vreinterpret_u8_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_u8_f64(a); - #else - simde_uint8x8_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_f64 - #define vreinterpret_u8_f64 simde_vreinterpret_u8_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_s8(a); - #else - simde_uint8x16_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_s8 - #define vreinterpretq_u8_s8(a) simde_vreinterpretq_u8_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_s16(a); - #else - simde_uint8x16_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_s16 - #define vreinterpretq_u8_s16(a) simde_vreinterpretq_u8_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_s32(a); - #else - simde_uint8x16_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_s32 - #define vreinterpretq_u8_s32(a) simde_vreinterpretq_u8_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_s64(a); - #else - simde_uint8x16_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_s64 - #define vreinterpretq_u8_s64(a) simde_vreinterpretq_u8_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_u16(a); - #else - simde_uint8x16_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_u16 - #define vreinterpretq_u8_u16(a) simde_vreinterpretq_u8_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_u32(a); - #else - simde_uint8x16_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_u32 - #define vreinterpretq_u8_u32(a) simde_vreinterpretq_u8_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_u64(a); - #else - simde_uint8x16_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_u64 - #define vreinterpretq_u8_u64(a) simde_vreinterpretq_u8_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_f32(a); - #else - simde_uint8x16_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_f32 - #define vreinterpretq_u8_f32(a) simde_vreinterpretq_u8_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_u8_f64(a); - #else - simde_uint8x16_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_f64 - #define vreinterpretq_u8_f64(a) simde_vreinterpretq_u8_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_s8(a); - #else - simde_uint16x4_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_s8 - #define vreinterpret_u16_s8 simde_vreinterpret_u16_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_s16(a); - #else - simde_uint16x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_s16 - #define vreinterpret_u16_s16 simde_vreinterpret_u16_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_s32(a); - #else - simde_uint16x4_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_s32 - #define vreinterpret_u16_s32 simde_vreinterpret_u16_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_s64(a); - #else - simde_uint16x4_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_s64 - #define vreinterpret_u16_s64 simde_vreinterpret_u16_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_u8(a); - #else - simde_uint16x4_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_u8 - #define vreinterpret_u16_u8 simde_vreinterpret_u16_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_u32(a); - #else - simde_uint16x4_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_u32 - #define vreinterpret_u16_u32 simde_vreinterpret_u16_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_u64(a); - #else - simde_uint16x4_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_u64 - #define vreinterpret_u16_u64 simde_vreinterpret_u16_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_u16_f16(a); - #else - simde_uint16x4_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_f16 - #define vreinterpret_u16_f16(a) simde_vreinterpret_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_f32(a); - #else - simde_uint16x4_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_f32 - #define vreinterpret_u16_f32 simde_vreinterpret_u16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_u16_f64(a); - #else - simde_uint16x4_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_f64 - #define vreinterpret_u16_f64 simde_vreinterpret_u16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_s8(a); - #else - simde_uint16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_s8 - #define vreinterpretq_u16_s8(a) simde_vreinterpretq_u16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_s16(a); - #else - simde_uint16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_s16 - #define vreinterpretq_u16_s16(a) simde_vreinterpretq_u16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_s32(a); - #else - simde_uint16x8_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_s32 - #define vreinterpretq_u16_s32(a) simde_vreinterpretq_u16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_s64(a); - #else - simde_uint16x8_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_s64 - #define vreinterpretq_u16_s64(a) simde_vreinterpretq_u16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_u8(a); - #else - simde_uint16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_u8 - #define vreinterpretq_u16_u8(a) simde_vreinterpretq_u16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_u32(a); - #else - simde_uint16x8_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_u32 - #define vreinterpretq_u16_u32(a) simde_vreinterpretq_u16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_u64(a); - #else - simde_uint16x8_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_u64 - #define vreinterpretq_u16_u64(a) simde_vreinterpretq_u16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_f32(a); - #else - simde_uint16x8_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_f32 - #define vreinterpretq_u16_f32(a) simde_vreinterpretq_u16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_u16_f64(a); - #else - simde_uint16x8_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_f64 - #define vreinterpretq_u16_f64(a) simde_vreinterpretq_u16_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_s8(a); - #else - simde_uint32x2_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_s8 - #define vreinterpret_u32_s8 simde_vreinterpret_u32_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_s16(a); - #else - simde_uint32x2_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_s16 - #define vreinterpret_u32_s16 simde_vreinterpret_u32_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_s32(a); - #else - simde_uint32x2_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_s32 - #define vreinterpret_u32_s32 simde_vreinterpret_u32_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_s64(a); - #else - simde_uint32x2_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_s64 - #define vreinterpret_u32_s64 simde_vreinterpret_u32_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_u8(a); - #else - simde_uint32x2_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_u8 - #define vreinterpret_u32_u8 simde_vreinterpret_u32_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_u16(a); - #else - simde_uint32x2_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_u16 - #define vreinterpret_u32_u16 simde_vreinterpret_u32_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_u64(a); - #else - simde_uint32x2_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_u64 - #define vreinterpret_u32_u64 simde_vreinterpret_u32_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_f32(a); - #else - simde_uint32x2_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_f32 - #define vreinterpret_u32_f32 simde_vreinterpret_u32_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_u32_f64(a); - #else - simde_uint32x2_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_f64 - #define vreinterpret_u32_f64 simde_vreinterpret_u32_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_s8(a); - #else - simde_uint32x4_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_s8 - #define vreinterpretq_u32_s8(a) simde_vreinterpretq_u32_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_s16(a); - #else - simde_uint32x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_s16 - #define vreinterpretq_u32_s16(a) simde_vreinterpretq_u32_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_s32(a); - #else - simde_uint32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_s32 - #define vreinterpretq_u32_s32(a) simde_vreinterpretq_u32_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_s64(a); - #else - simde_uint32x4_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_s64 - #define vreinterpretq_u32_s64(a) simde_vreinterpretq_u32_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_u8(a); - #else - simde_uint32x4_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_u8 - #define vreinterpretq_u32_u8(a) simde_vreinterpretq_u32_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_u16(a); - #else - simde_uint32x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_u16 - #define vreinterpretq_u32_u16(a) simde_vreinterpretq_u32_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_u64(a); - #else - simde_uint32x4_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_u64 - #define vreinterpretq_u32_u64(a) simde_vreinterpretq_u32_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_u16_f16(a); - #else - simde_uint16x8_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_f16 - #define vreinterpretq_u16_f16(a) simde_vreinterpretq_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_f32(a); - #else - simde_uint32x4_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_f32 - #define vreinterpretq_u32_f32(a) simde_vreinterpretq_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_u32_f64(a); - #else - simde_uint32x4_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_f64 - #define vreinterpretq_u32_f64(a) simde_vreinterpretq_u32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_s8(a); - #else - simde_uint64x1_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_s8 - #define vreinterpret_u64_s8 simde_vreinterpret_u64_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_s16(a); - #else - simde_uint64x1_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_s16 - #define vreinterpret_u64_s16 simde_vreinterpret_u64_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_s32(a); - #else - simde_uint64x1_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_s32 - #define vreinterpret_u64_s32 simde_vreinterpret_u64_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_s64(a); - #else - simde_uint64x1_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_s64 - #define vreinterpret_u64_s64 simde_vreinterpret_u64_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_u8(a); - #else - simde_uint64x1_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_u8 - #define vreinterpret_u64_u8 simde_vreinterpret_u64_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_u16(a); - #else - simde_uint64x1_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_u16 - #define vreinterpret_u64_u16 simde_vreinterpret_u64_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_u32(a); - #else - simde_uint64x1_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_u32 - #define vreinterpret_u64_u32 simde_vreinterpret_u64_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_u64_f16(a); - #else - simde_uint64x1_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_f16 - #define vreinterpret_u64_f16 simde_vreinterpret_u64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_f32(a); - #else - simde_uint64x1_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_f32 - #define vreinterpret_u64_f32 simde_vreinterpret_u64_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_u64_f64(a); - #else - simde_uint64x1_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_f64 - #define vreinterpret_u64_f64 simde_vreinterpret_u64_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s8(a); - #else - simde_uint64x2_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_s8 - #define vreinterpretq_u64_s8(a) simde_vreinterpretq_u64_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s16(a); - #else - simde_uint64x2_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_s16 - #define vreinterpretq_u64_s16(a) simde_vreinterpretq_u64_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s32(a); - #else - simde_uint64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_s32 - #define vreinterpretq_u64_s32(a) simde_vreinterpretq_u64_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s64(a); - #else - simde_uint64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_s64 - #define vreinterpretq_u64_s64(a) simde_vreinterpretq_u64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_u8(a); - #else - simde_uint64x2_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_u8 - #define vreinterpretq_u64_u8(a) simde_vreinterpretq_u64_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_u16(a); - #else - simde_uint64x2_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_u16 - #define vreinterpretq_u64_u16(a) simde_vreinterpretq_u64_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_u32(a); - #else - simde_uint64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_u32 - #define vreinterpretq_u64_u32(a) simde_vreinterpretq_u64_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_f32(a); - #else - simde_uint64x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_f32 - #define vreinterpretq_u64_f32(a) simde_vreinterpretq_u64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_u64_f64(a); - #else - simde_uint64x2_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_f64 - #define vreinterpretq_u64_f64(a) simde_vreinterpretq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_s8(a); - #else - simde_float32x2_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_s8 - #define vreinterpret_f32_s8 simde_vreinterpret_f32_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_s16(a); - #else - simde_float32x2_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_s16 - #define vreinterpret_f32_s16 simde_vreinterpret_f32_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_s32(a); - #else - simde_float32x2_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_s32 - #define vreinterpret_f32_s32 simde_vreinterpret_f32_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_s64(a); - #else - simde_float32x2_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_s64 - #define vreinterpret_f32_s64 simde_vreinterpret_f32_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_u8(a); - #else - simde_float32x2_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_u8 - #define vreinterpret_f32_u8 simde_vreinterpret_f32_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_u16(a); - #else - simde_float32x2_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_u16 - #define vreinterpret_f32_u16 simde_vreinterpret_f32_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_u16(a); - #else - simde_float16x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_u16 - #define vreinterpret_f16_u16(a) simde_vreinterpret_f16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_u32(a); - #else - simde_float32x2_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_u32 - #define vreinterpret_f32_u32 simde_vreinterpret_f32_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_u64(a); - #else - simde_float32x2_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_u64 - #define vreinterpret_f32_u64 simde_vreinterpret_f32_u64 -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f32_f64(a); - #else - simde_float32x2_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_f64 - #define vreinterpret_f32_f64 simde_vreinterpret_f32_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_s8(a); - #else - simde_float32x4_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_s8 - #define vreinterpretq_f32_s8(a) simde_vreinterpretq_f32_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_s16(a); - #else - simde_float32x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_s16 - #define vreinterpretq_f32_s16(a) simde_vreinterpretq_f32_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_s32(a); - #else - simde_float32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_s32 - #define vreinterpretq_f32_s32(a) simde_vreinterpretq_f32_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_s64(a); - #else - simde_float32x4_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_s64 - #define vreinterpretq_f32_s64(a) simde_vreinterpretq_f32_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_u8(a); - #else - simde_float32x4_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_u8 - #define vreinterpretq_f32_u8(a) simde_vreinterpretq_f32_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_u16(a); - #else - simde_float32x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_u16 - #define vreinterpretq_f32_u16(a) simde_vreinterpretq_f32_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_u16(a); - #else - simde_float16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_u16 - #define vreinterpretq_f16_u16(a) simde_vreinterpretq_f16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_u32(a); - #else - simde_float32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_u32 - #define vreinterpretq_f32_u32(a) simde_vreinterpretq_f32_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_u64(a); - #else - simde_float32x4_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_u64 - #define vreinterpretq_f32_u64(a) simde_vreinterpretq_f32_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde_float32x4_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_f64 - #define vreinterpretq_f32_f64(a) simde_vreinterpretq_f32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_s8(a); - #else - simde_float64x1_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_s8 - #define vreinterpret_f64_s8 simde_vreinterpret_f64_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_s16(a); - #else - simde_float64x1_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_s16 - #define vreinterpret_f64_s16 simde_vreinterpret_f64_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_s32(a); - #else - simde_float64x1_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_s32 - #define vreinterpret_f64_s32 simde_vreinterpret_f64_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_s64(a); - #else - simde_float64x1_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_s64 - #define vreinterpret_f64_s64 simde_vreinterpret_f64_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_u8(a); - #else - simde_float64x1_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_u8 - #define vreinterpret_f64_u8 simde_vreinterpret_f64_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_u16(a); - #else - simde_float64x1_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_u16 - #define vreinterpret_f64_u16 simde_vreinterpret_f64_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_u32(a); - #else - simde_float64x1_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_u32 - #define vreinterpret_f64_u32 simde_vreinterpret_f64_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_u64(a); - #else - simde_float64x1_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_u64 - #define vreinterpret_f64_u64 simde_vreinterpret_f64_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_f32(a); - #else - simde_float64x1_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_f32 - #define vreinterpret_f64_f32 simde_vreinterpret_f64_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s8(a); - #else - simde_float64x2_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_s8 - #define vreinterpretq_f64_s8(a) simde_vreinterpretq_f64_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s16(a); - #else - simde_float64x2_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_s16 - #define vreinterpretq_f64_s16(a) simde_vreinterpretq_f64_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s32(a); - #else - simde_float64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_s32 - #define vreinterpretq_f64_s32(a) simde_vreinterpretq_f64_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde_float64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_s64 - #define vreinterpretq_f64_s64(a) simde_vreinterpretq_f64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_u8(a); - #else - simde_float64x2_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_u8 - #define vreinterpretq_f64_u8(a) simde_vreinterpretq_f64_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_u16(a); - #else - simde_float64x2_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_u16 - #define vreinterpretq_f64_u16(a) simde_vreinterpretq_f64_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_u32(a); - #else - simde_float64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_u32 - #define vreinterpretq_f64_u32(a) simde_vreinterpretq_f64_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_u64(a); - #else - simde_float64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_u64 - #define vreinterpretq_f64_u64(a) simde_vreinterpretq_f64_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde_float64x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_f32 - #define vreinterpretq_f64_f32(a) simde_vreinterpretq_f64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_f32(a); - #else - simde_float16x4_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_f32 - #define vreinterpret_f16_f32 simde_vreinterpret_f16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_s16(a); - #else - simde_float16x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_s16 - #define vreinterpret_f16_s16 simde_vreinterpret_f16_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_s32(a); - #else - simde_float16x4_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_s32 - #define vreinterpret_f16_s32 simde_vreinterpret_f16_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_s64(a); - #else - simde_float16x4_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_s64 - #define vreinterpret_f16_s64 simde_vreinterpret_f16_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_s8(a); - #else - simde_float16x4_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_s8 - #define vreinterpret_f16_s8 simde_vreinterpret_f16_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_u32(a); - #else - simde_float16x4_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_u32 - #define vreinterpret_f16_u32 simde_vreinterpret_f16_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_u64(a); - #else - simde_float16x4_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_u64 - #define vreinterpret_f16_u64 simde_vreinterpret_f16_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_u8(a); - #else - simde_float16x4_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_u8 - #define vreinterpret_f16_u8 simde_vreinterpret_f16_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_f32(a); - #else - simde_float16x8_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_f32 - #define vreinterpretq_f16_f32(a) simde_vreinterpretq_f16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_s16(a); - #else - simde_float16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_s16 - #define vreinterpretq_f16_s16(a) simde_vreinterpretq_f16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_s32(a); - #else - simde_float16x8_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_s32 - #define vreinterpretq_f16_s32(a) simde_vreinterpretq_f16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_s64(a); - #else - simde_float16x8_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_s64 - #define vreinterpretq_f16_s64(a) simde_vreinterpretq_f16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_s8(a); - #else - simde_float16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_s8 - #define vreinterpretq_f16_s8(a) simde_vreinterpretq_f16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_u32(a); - #else - simde_float16x8_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_u32 - #define vreinterpretq_f16_u32(a) simde_vreinterpretq_f16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_u64(a); - #else - simde_float16x8_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_u64 - #define vreinterpretq_f16_u64(a) simde_vreinterpretq_f16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_u8(a); - #else - simde_float16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_u8 - #define vreinterpretq_f16_u8(a) simde_vreinterpretq_f16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_f64(a); - #else - simde_float16x4_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_f64 - #define vreinterpret_f16_f64 simde_vreinterpret_f16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_f64(a); - #else - simde_float16x8_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_f64 - #define vreinterpretq_f16_f64(a) simde_vreinterpretq_f16_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f32_f16(a); - #else - simde_float32x2_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_f16 - #define vreinterpret_f32_f16 simde_vreinterpret_f32_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f32_f16(a); - #else - simde_float32x4_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_f16 - #define vreinterpretq_f32_f16 simde_vreinterpretq_f32_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f64_f16(a); - #else - simde_float64x1_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_f16 - #define vreinterpret_f64_f16 simde_vreinterpret_f64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f64_f16(a); - #else - simde_float64x2_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_f16 - #define vreinterpretq_f64_f16 simde_vreinterpretq_f64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_u8_f16(a); - #else - simde_uint8x8_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_f16 - #define vreinterpret_u8_f16(a) simde_vreinterpret_u8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_u8_f16(a); - #else - simde_uint8x16_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_f16 - #define vreinterpretq_u8_f16(a) simde_vreinterpretq_u8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_s8_f16(a); - #else - simde_int8x8_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_f16 - #define vreinterpret_s8_f16(a) simde_vreinterpret_s8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_s8_f16(a); - #else - simde_int8x16_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_f16 - #define vreinterpretq_s8_f16(a) simde_vreinterpretq_s8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_s16_f16(a); - #else - simde_int16x4_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_f16 - #define vreinterpret_s16_f16(a) simde_vreinterpret_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_s16_f16(a); - #else - simde_int16x8_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_f16 - #define vreinterpretq_s16_f16(a) simde_vreinterpretq_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_s32_f16(a); - #else - simde_int32x2_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_f16 - #define vreinterpret_s32_f16(a) simde_vreinterpret_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_s32_f16(a); - #else - simde_int32x4_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_f16 - #define vreinterpretq_s32_f16(a) simde_vreinterpretq_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_s64_f16(a); - #else - simde_int64x1_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_f16 - #define vreinterpret_s64_f16(a) simde_vreinterpret_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_s64_f16(a); - #else - simde_int64x2_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_f16 - #define vreinterpretq_s64_f16(a) simde_vreinterpretq_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_u32_f16(a); - #else - simde_uint32x2_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_f16 - #define vreinterpret_u32_f16(a) simde_vreinterpret_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_u32_f16(a); - #else - simde_uint32x4_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_f16 - #define vreinterpretq_u32_f16(a) simde_vreinterpretq_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_u64_f16(a); - #else - simde_uint64x2_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_f16 - #define vreinterpretq_u64_f16 simde_vreinterpretq_u64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_s8(a); - #else - simde_poly8x8_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_s8 - #define vreinterpret_p8_s8 simde_vreinterpret_p8_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_s16(a); - #else - simde_poly8x8_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_s16 - #define vreinterpret_p8_s16 simde_vreinterpret_p8_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_s32(a); - #else - simde_poly8x8_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_s32 - #define vreinterpret_p8_s32 simde_vreinterpret_p8_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_s64(a); - #else - simde_poly8x8_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_s64 - #define vreinterpret_p8_s64 simde_vreinterpret_p8_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_p16(a); - #else - simde_poly8x8_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_p16 - #define vreinterpret_p8_p16 simde_vreinterpret_p8_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p8_p64(a); - #else - simde_poly8x8_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_p64 - #define vreinterpret_p8_p64 simde_vreinterpret_p8_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_f32(a); - #else - simde_poly8x8_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_f32 - #define vreinterpret_p8_f32 simde_vreinterpret_p8_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_p8_f64(a); - #else - simde_poly8x8_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_f64 - #define vreinterpret_p8_f64 simde_vreinterpret_p8_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_s8(a); - #else - simde_poly8x16_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_s8 - #define vreinterpretq_p8_s8(a) simde_vreinterpretq_p8_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_s16(a); - #else - simde_poly8x16_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_s16 - #define vreinterpretq_p8_s16(a) simde_vreinterpretq_p8_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_s32(a); - #else - simde_poly8x16_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_s32 - #define vreinterpretq_p8_s32(a) simde_vreinterpretq_p8_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_s64(a); - #else - simde_poly8x16_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_s64 - #define vreinterpretq_p8_s64(a) simde_vreinterpretq_p8_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_p16(a); - #else - simde_poly8x16_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_p16 - #define vreinterpretq_p8_p16(a) simde_vreinterpretq_p8_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p8_p64(a); - #else - simde_poly8x16_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_p64 - #define vreinterpretq_p8_p64(a) simde_vreinterpretq_p8_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_f32(a); - #else - simde_poly8x16_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_f32 - #define vreinterpretq_p8_f32(a) simde_vreinterpretq_p8_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_p8_f64(a); - #else - simde_poly8x16_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_f64 - #define vreinterpretq_p8_f64(a) simde_vreinterpretq_p8_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_s8(a); - #else - simde_poly16x4_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_s8 - #define vreinterpret_p16_s8 simde_vreinterpret_p16_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_s16(a); - #else - simde_poly16x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_s16 - #define vreinterpret_p16_s16 simde_vreinterpret_p16_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_s32(a); - #else - simde_poly16x4_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_s32 - #define vreinterpret_p16_s32 simde_vreinterpret_p16_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_s64(a); - #else - simde_poly16x4_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_s64 - #define vreinterpret_p16_s64 simde_vreinterpret_p16_s64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_p8(a); - #else - simde_poly16x4_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_p8 - #define vreinterpret_p16_p8 simde_vreinterpret_p16_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p16_p64(a); - #else - simde_poly16x4_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_p64 - #define vreinterpret_p16_p64 simde_vreinterpret_p16_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_p16_f16(a); - #else - simde_poly16x4_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_f16 - #define vreinterpret_p16_f16(a) simde_vreinterpret_p16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_f32(a); - #else - simde_poly16x4_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_f32 - #define vreinterpret_p16_f32 simde_vreinterpret_p16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_p16_f64(a); - #else - simde_poly16x4_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_f64 - #define vreinterpret_p16_f64 simde_vreinterpret_p16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_s8(a); - #else - simde_poly16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_s8 - #define vreinterpretq_p16_s8(a) simde_vreinterpretq_p16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_s16(a); - #else - simde_poly16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_s16 - #define vreinterpretq_p16_s16(a) simde_vreinterpretq_p16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_s32(a); - #else - simde_poly16x8_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_s32 - #define vreinterpretq_p16_s32(a) simde_vreinterpretq_p16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_s64(a); - #else - simde_poly16x8_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_s64 - #define vreinterpretq_p16_s64(a) simde_vreinterpretq_p16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_p8(a); - #else - simde_poly16x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_p8 - #define vreinterpretq_p16_p8(a) simde_vreinterpretq_p16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p16_p64(a); - #else - simde_poly16x8_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_p64 - #define vreinterpretq_p16_p64(a) simde_vreinterpretq_p16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_f32(a); - #else - simde_poly16x8_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_f32 - #define vreinterpretq_p16_f32(a) simde_vreinterpretq_p16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_p16_f64(a); - #else - simde_poly16x8_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_f64 - #define vreinterpretq_p16_f64(a) simde_vreinterpretq_p16_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_p16_f16(a); - #else - simde_poly16x8_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_f16 - #define vreinterpretq_p16_f16(a) simde_vreinterpretq_p16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_s8(a); - #else - simde_poly64x1_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_s8 - #define vreinterpret_p64_s8 simde_vreinterpret_p64_s8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_s16(a); - #else - simde_poly64x1_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_s16 - #define vreinterpret_p64_s16 simde_vreinterpret_p64_s16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_s32(a); - #else - simde_poly64x1_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_s32 - #define vreinterpret_p64_s32 simde_vreinterpret_p64_s32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_p8(a); - #else - simde_poly64x1_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_p8 - #define vreinterpret_p64_p8 simde_vreinterpret_p64_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_p16(a); - #else - simde_poly64x1_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_p16 - #define vreinterpret_p64_p16 simde_vreinterpret_p64_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_p64_f16(a); - #else - simde_poly64x1_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_f16 - #define vreinterpret_p64_f16 simde_vreinterpret_p64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_f32(a); - #else - simde_poly64x1_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_f32 - #define vreinterpret_p64_f32 simde_vreinterpret_p64_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_p64_f64(a); - #else - simde_poly64x1_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_f64 - #define vreinterpret_p64_f64 simde_vreinterpret_p64_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_s8(a); - #else - simde_poly64x2_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_s8 - #define vreinterpretq_p64_s8(a) simde_vreinterpretq_p64_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_s16(a); - #else - simde_poly64x2_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_s16 - #define vreinterpretq_p64_s16(a) simde_vreinterpretq_p64_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_s32(a); - #else - simde_poly64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_s32 - #define vreinterpretq_p64_s32(a) simde_vreinterpretq_p64_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_s64(a); - #else - simde_poly64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_s64 - #define vreinterpretq_p64_s64(a) simde_vreinterpretq_p64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_p8(a); - #else - simde_poly64x2_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_p8 - #define vreinterpretq_p64_p8(a) simde_vreinterpretq_p64_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_p16(a); - #else - simde_poly64x2_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_p16 - #define vreinterpretq_p64_p16(a) simde_vreinterpretq_p64_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_f32(a); - #else - simde_poly64x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_f32 - #define vreinterpretq_p64_f32(a) simde_vreinterpretq_p64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_p64_f64(a); - #else - simde_poly64x2_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_f64 - #define vreinterpretq_p64_f64(a) simde_vreinterpretq_p64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_p8_f16(a); - #else - simde_poly8x8_private r_; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_f16 - #define vreinterpret_p8_f16(a) simde_vreinterpret_p8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_p8_f16(a); - #else - simde_poly8x16_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_f16 - #define vreinterpretq_p8_f16(a) simde_vreinterpretq_p8_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_p64_f16(a); - #else - simde_poly64x2_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_f16 - #define vreinterpretq_p64_f16 simde_vreinterpretq_p64_f16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_p8(a); - #else - simde_int8x8_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_p8 - #define vreinterpret_s8_p8 simde_vreinterpret_s8_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s8_p16(a); - #else - simde_int8x8_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_p16 - #define vreinterpret_s8_p16 simde_vreinterpret_s8_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_s8_p64(a); - #else - simde_int8x8_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_p64 - #define vreinterpret_s8_p64 simde_vreinterpret_s8_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_p8(a); - #else - simde_int8x16_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_p8 - #define vreinterpretq_s8_p8(a) simde_vreinterpretq_s8_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s8_p16(a); - #else - simde_int8x16_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_p16 - #define vreinterpretq_s8_p16(a) simde_vreinterpretq_s8_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_s8_p64(a); - #else - simde_int8x16_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_p64 - #define vreinterpretq_s8_p64(a) simde_vreinterpretq_s8_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_p8(a); - #else - simde_int16x4_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_p8 - #define vreinterpret_s16_p8 simde_vreinterpret_s16_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s16_p16(a); - #else - simde_int16x4_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_p16 - #define vreinterpret_s16_p16 simde_vreinterpret_s16_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_s16_p64(a); - #else - simde_int16x4_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_p64 - #define vreinterpret_s16_p64 simde_vreinterpret_s16_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_p8(a); - #else - simde_int16x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_p8 - #define vreinterpretq_s16_p8(a) simde_vreinterpretq_s16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s16_p16(a); - #else - simde_int16x8_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_p16 - #define vreinterpretq_s16_p16(a) simde_vreinterpretq_s16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_s16_p64(a); - #else - simde_int16x8_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_p64 - #define vreinterpretq_s16_p64(a) simde_vreinterpretq_s16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_p8(a); - #else - simde_int32x2_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_p8 - #define vreinterpret_s32_p8 simde_vreinterpret_s32_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s32_p16(a); - #else - simde_int32x2_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_p16 - #define vreinterpret_s32_p16 simde_vreinterpret_s32_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_s32_p64(a); - #else - simde_int32x2_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_p64 - #define vreinterpret_s32_p64 simde_vreinterpret_s32_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_p8(a); - #else - simde_int32x4_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_p8 - #define vreinterpretq_s32_p8(a) simde_vreinterpretq_s32_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s32_p16(a); - #else - simde_int32x4_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_p16 - #define vreinterpretq_s32_p16(a) simde_vreinterpretq_s32_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_s32_p64(a); - #else - simde_int32x4_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_p64 - #define vreinterpretq_s32_p64(a) simde_vreinterpretq_s32_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_p8(a); - #else - simde_int64x1_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_p8 - #define vreinterpret_s64_p8 simde_vreinterpret_s64_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_s64_p16(a); - #else - simde_int64x1_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_p16 - #define vreinterpret_s64_p16 simde_vreinterpret_s64_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_s64_p64(a); - #else - simde_int64x1_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_p64 - #define vreinterpret_s64_p64 simde_vreinterpret_s64_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_p8(a); - #else - simde_int64x2_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_p8 - #define vreinterpretq_s64_p8(a) simde_vreinterpretq_s64_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_s64_p16(a); - #else - simde_int64x2_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_p16 - #define vreinterpretq_s64_p16(a) simde_vreinterpretq_s64_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_s64_p64(a); - #else - simde_int64x2_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_p64 - #define vreinterpretq_s64_p64(a) simde_vreinterpretq_s64_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_p8(a); - #else - simde_float32x2_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_p8 - #define vreinterpret_f32_p8 simde_vreinterpret_f32_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_f32_p16(a); - #else - simde_float32x2_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_p16 - #define vreinterpret_f32_p16 simde_vreinterpret_f32_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_p16(a); - #else - simde_float16x4_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_p16 - #define vreinterpret_f16_p16(a) simde_vreinterpret_f16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_p8(a); - #else - simde_float32x4_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_p8 - #define vreinterpretq_f32_p8(a) simde_vreinterpretq_f32_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_f32_p16(a); - #else - simde_float32x4_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_p16 - #define vreinterpretq_f32_p16(a) simde_vreinterpretq_f32_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_p16(a); - #else - simde_float16x8_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_p16 - #define vreinterpretq_f16_p16(a) simde_vreinterpretq_f16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_p8(a); - #else - simde_float64x1_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_p8 - #define vreinterpret_f64_p8 simde_vreinterpret_f64_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_p16(a); - #else - simde_float64x1_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_p16 - #define vreinterpret_f64_p16 simde_vreinterpret_f64_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpret_f64_p64(a); - #else - simde_float64x1_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_p64 - #define vreinterpret_f64_p64 simde_vreinterpret_f64_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_p8(a); - #else - simde_float64x2_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_p8 - #define vreinterpretq_f64_p8(a) simde_vreinterpretq_f64_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_p16(a); - #else - simde_float64x2_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_p16 - #define vreinterpretq_f64_p16(a) simde_vreinterpretq_f64_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_p64(a); - #else - simde_float64x2_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_p64 - #define vreinterpretq_f64_p64(a) simde_vreinterpretq_f64_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_p64(a); - #else - simde_float16x4_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_p64 - #define vreinterpret_f16_p64 simde_vreinterpret_f16_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vreinterpret_f16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpret_f16_p8(a); - #else - simde_float16x4_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f16_p8 - #define vreinterpret_f16_p8 simde_vreinterpret_f16_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_p64(a); - #else - simde_float16x8_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_p64 - #define vreinterpretq_f16_p64(a) simde_vreinterpretq_f16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vreinterpretq_f16_p8(a); - #else - simde_float16x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_p8 - #define vreinterpretq_f16_p8(a) simde_vreinterpretq_f16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_p16(a); - #else - simde_uint8x8_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_p16 - #define vreinterpret_u8_p16 simde_vreinterpret_u8_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_u8_p64(a); - #else - simde_uint8x8_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_p64 - #define vreinterpret_u8_p64 simde_vreinterpret_u8_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_p16(a); - #else - simde_uint8x16_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_p16 - #define vreinterpretq_u8_p16(a) simde_vreinterpretq_u8_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_u8_p64(a); - #else - simde_uint8x16_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_p64 - #define vreinterpretq_u8_p64(a) simde_vreinterpretq_u8_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_p8(a); - #else - simde_uint16x4_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_p8 - #define vreinterpret_u16_p8 simde_vreinterpret_u16_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_u16_p64(a); - #else - simde_uint16x4_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_p64 - #define vreinterpret_u16_p64 simde_vreinterpret_u16_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_p8(a); - #else - simde_uint16x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_p8 - #define vreinterpretq_u16_p8(a) simde_vreinterpretq_u16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_u16_p64(a); - #else - simde_uint16x8_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_p64 - #define vreinterpretq_u16_p64(a) simde_vreinterpretq_u16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_p8(a); - #else - simde_uint32x2_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_p8 - #define vreinterpret_u32_p8 simde_vreinterpret_u32_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_p8(a); - #else - simde_uint32x4_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_p8 - #define vreinterpretq_u32_p8(a) simde_vreinterpretq_u32_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u32_p16(a); - #else - simde_uint32x2_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_p16 - #define vreinterpret_u32_p16 simde_vreinterpret_u32_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u32_p16(a); - #else - simde_uint32x4_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_p16 - #define vreinterpretq_u32_p16(a) simde_vreinterpretq_u32_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_u32_p64(a); - #else - simde_uint32x2_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_p64 - #define vreinterpret_u32_p64 simde_vreinterpret_u32_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_u32_p64(a); - #else - simde_uint32x4_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_p64 - #define vreinterpretq_u32_p64(a) simde_vreinterpretq_u32_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_p8(a); - #else - simde_uint64x1_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_p8 - #define vreinterpret_u64_p8 simde_vreinterpret_u64_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_p8(a); - #else - simde_uint64x2_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_p8 - #define vreinterpretq_u64_p8(a) simde_vreinterpretq_u64_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u64_p16(a); - #else - simde_uint64x1_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_p16 - #define vreinterpret_u64_p16 simde_vreinterpret_u64_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_p16(a); - #else - simde_uint64x2_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_p16 - #define vreinterpretq_u64_p16(a) simde_vreinterpretq_u64_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_u16(a); - #else - simde_poly8x8_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_u16 - #define vreinterpret_p8_u16 simde_vreinterpret_p8_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_u64(a); - #else - simde_poly8x8_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_u64 - #define vreinterpret_p8_u64 simde_vreinterpret_p8_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_u16(a); - #else - simde_poly8x16_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_u16 - #define vreinterpretq_p8_u16(a) simde_vreinterpretq_p8_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_u64(a); - #else - simde_poly8x16_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_u64 - #define vreinterpretq_p8_u64(a) simde_vreinterpretq_p8_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_u32(a); - #else - simde_poly8x8_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_u32 - #define vreinterpret_p8_u32 simde_vreinterpret_p8_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_u32(a); - #else - simde_poly8x16_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_u32 - #define vreinterpretq_p8_u32(a) simde_vreinterpretq_p8_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_u8(a); - #else - simde_poly16x4_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_u8 - #define vreinterpret_p16_u8 simde_vreinterpret_p16_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_u32(a); - #else - simde_poly16x4_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_u32 - #define vreinterpret_p16_u32 simde_vreinterpret_p16_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_u64(a); - #else - simde_poly16x4_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_u64 - #define vreinterpret_p16_u64 simde_vreinterpret_p16_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_u8(a); - #else - simde_poly16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_u8 - #define vreinterpretq_p16_u8(a) simde_vreinterpretq_p16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_u32(a); - #else - simde_poly16x8_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_u32 - #define vreinterpretq_p16_u32(a) simde_vreinterpretq_p16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_u64(a); - #else - simde_poly16x8_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_u64 - #define vreinterpretq_p16_u64(a) simde_vreinterpretq_p16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_u8(a); - #else - simde_poly64x1_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_u8 - #define vreinterpret_p64_u8 simde_vreinterpret_p64_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_u16(a); - #else - simde_poly64x1_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_u16 - #define vreinterpret_p64_u16 simde_vreinterpret_p64_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_u32(a); - #else - simde_poly64x1_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_u32 - #define vreinterpret_p64_u32 simde_vreinterpret_p64_u32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_u8(a); - #else - simde_poly64x2_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_u8 - #define vreinterpretq_p64_u8(a) simde_vreinterpretq_p64_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_u16(a); - #else - simde_poly64x2_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_u16 - #define vreinterpretq_p64_u16(a) simde_vreinterpretq_p64_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_u32(a); - #else - simde_poly64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_u32 - #define vreinterpretq_p64_u32(a) simde_vreinterpretq_p64_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u8_p8(a); - #else - simde_uint8x8_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_p8 - #define vreinterpret_u8_p8 simde_vreinterpret_u8_p8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u8_p8(a); - #else - simde_uint8x16_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_p8 - #define vreinterpretq_u8_p8(a) simde_vreinterpretq_u8_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_u16_p16(a); - #else - simde_uint16x4_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_p16 - #define vreinterpret_u16_p16 simde_vreinterpret_u16_p16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u16_p16(a); - #else - simde_uint16x8_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_p16 - #define vreinterpretq_u16_p16(a) simde_vreinterpretq_u16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_u64_p64(a); - #else - simde_uint64x1_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_p64 - #define vreinterpret_u64_p64 simde_vreinterpret_u64_p64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_u64_p64(a); - #else - simde_uint64x2_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_p64 - #define vreinterpretq_u64_p64(a) simde_vreinterpretq_u64_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p8_u8(a); - #else - simde_poly8x8_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_u8 - #define vreinterpret_p8_u8 simde_vreinterpret_p8_u8 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p8_u8(a); - #else - simde_poly8x16_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_u8 - #define vreinterpretq_p8_u8(a) simde_vreinterpretq_p8_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpret_p16_u16(a); - #else - simde_poly16x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_u16 - #define vreinterpret_p16_u16 simde_vreinterpret_p16_u16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_p16_u16(a); - #else - simde_poly16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_u16 - #define vreinterpretq_p16_u16(a) simde_vreinterpretq_p16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpret_p64_u64(a); - #else - simde_poly64x1_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_u64 - #define vreinterpret_p64_u64 simde_vreinterpret_p64_u64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vreinterpretq_p64_u64(a); - #else - simde_poly64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_u64 - #define vreinterpretq_p64_u64(a) simde_vreinterpretq_p64_u64(a) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_s8(a); - #else - simde_poly128_t r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_s8 - #define vreinterpretq_p128_s8(a) simde_vreinterpretq_p128_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_s16(a); - #else - simde_poly128_t r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_s16 - #define vreinterpretq_p128_s16(a) simde_vreinterpretq_p128_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_s32(a); - #else - simde_poly128_t r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_s32 - #define vreinterpretq_p128_s32(a) simde_vreinterpretq_p128_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_s64(a); - #else - simde_poly128_t r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_s64 - #define vreinterpretq_p128_s64(a) simde_vreinterpretq_p128_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_u8(a); - #else - simde_poly128_t r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_u8 - #define vreinterpretq_p128_u8(a) simde_vreinterpretq_p128_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_u16(a); - #else - simde_poly128_t r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_u16 - #define vreinterpretq_p128_u16(a) simde_vreinterpretq_p128_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_u32(a); - #else - simde_poly128_t r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_u32 - #define vreinterpretq_p128_u32(a) simde_vreinterpretq_p128_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_u64(a); - #else - simde_poly128_t r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_u64 - #define vreinterpretq_p128_u64(a) simde_vreinterpretq_p128_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_p8(a); - #else - simde_poly128_t r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_p8 - #define vreinterpretq_p128_p8(a) simde_vreinterpretq_p128_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_p16(a); - #else - simde_poly128_t r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_p16 - #define vreinterpretq_p128_p16(a) simde_vreinterpretq_p128_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_f16(a); - #else - simde_poly128_t r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_f16 - #define vreinterpretq_p128_f16(a) simde_vreinterpretq_p128_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_f32(a); - #else - simde_poly128_t r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_f32 - #define vreinterpretq_p128_f32(a) simde_vreinterpretq_p128_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p128_f64(a); - #else - simde_poly128_t r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_f64 - #define vreinterpretq_p128_f64(a) simde_vreinterpretq_p128_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_s8_p128(a); - #else - simde_int8x16_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_p128 - #define vreinterpretq_s8_p128(a) simde_vreinterpretq_s8_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_s16_p128(a); - #else - simde_int16x8_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_p128 - #define vreinterpretq_s16_p128(a) simde_vreinterpretq_s16_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_s32_p128(a); - #else - simde_int32x4_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_p128 - #define vreinterpretq_s32_p128(a) simde_vreinterpretq_s32_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_s64_p128(a); - #else - simde_int64x2_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_p128 - #define vreinterpretq_s64_p128(a) simde_vreinterpretq_s64_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_u8_p128(a); - #else - simde_uint8x16_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_p128 - #define vreinterpretq_u8_p128(a) simde_vreinterpretq_u8_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_u16_p128(a); - #else - simde_uint16x8_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_p128 - #define vreinterpretq_u16_p128(a) simde_vreinterpretq_u16_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_u32_p128(a); - #else - simde_uint32x4_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_p128 - #define vreinterpretq_u32_p128(a) simde_vreinterpretq_u32_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_u64_p128(a); - #else - simde_uint64x2_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_p128 - #define vreinterpretq_u64_p128(a) simde_vreinterpretq_u64_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p8_p128(a); - #else - simde_poly8x16_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_p128 - #define vreinterpretq_p8_p128(a) simde_vreinterpretq_p8_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_p16_p128(a); - #else - simde_poly16x8_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_p128 - #define vreinterpretq_p16_p128(a) simde_vreinterpretq_p16_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vreinterpretq_f16_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_f16_p128(a); - #else - simde_float16x8_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f16_p128 - #define vreinterpretq_f16_p128(a) simde_vreinterpretq_f16_p128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vreinterpretq_f64_p128(a); - #else - simde_float64x2_private r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_p128 - #define vreinterpretq_f64_p128(a) simde_vreinterpretq_f64_p128(a) -#endif - -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_s8(a); - #else - simde_bfloat16x4_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_s8 - #define vreinterpret_bf16_s8(a) simde_vreinterpret_bf16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_s16(a); - #else - simde_bfloat16x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_s16 - #define vreinterpret_bf16_s16(a) simde_vreinterpret_bf16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_s32(a); - #else - simde_bfloat16x4_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_s32 - #define vreinterpret_bf16_s32(a) simde_vreinterpret_bf16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_s64(a); - #else - simde_bfloat16x4_private r_; - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_s64 - #define vreinterpret_bf16_s64(a) simde_vreinterpret_bf16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_u8(a); - #else - simde_bfloat16x4_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_u8 - #define vreinterpret_bf16_u8(a) simde_vreinterpret_bf16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_u16(a); - #else - simde_bfloat16x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_u16 - #define vreinterpret_bf16_u16(a) simde_vreinterpret_bf16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_u32(a); - #else - simde_bfloat16x4_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_u32 - #define vreinterpret_bf16_u32(a) simde_vreinterpret_bf16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_u64(a); - #else - simde_bfloat16x4_private r_; - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_u64 - #define vreinterpret_bf16_u64(a) simde_vreinterpret_bf16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_f32(a); - #else - simde_bfloat16x4_private r_; - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_f32 - #define vreinterpret_bf16_f32 simde_vreinterpret_bf16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_f64(a); - #else - simde_bfloat16x4_private r_; - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_f64 - #define vreinterpret_bf16_f64 simde_vreinterpret_bf16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_s8(a); - #else - simde_bfloat16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_s8 - #define vreinterpretq_bf16_s8(a) simde_vreinterpretq_bf16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_s16(a); - #else - simde_bfloat16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_s16 - #define vreinterpretq_bf16_s16(a) simde_vreinterpretq_bf16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_s32(a); - #else - simde_bfloat16x8_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_s32 - #define vreinterpretq_bf16_s32(a) simde_vreinterpretq_bf16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_s64(a); - #else - simde_bfloat16x8_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_s64 - #define vreinterpretq_bf16_s64(a) simde_vreinterpretq_bf16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_u8(a); - #else - simde_bfloat16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_u8 - #define vreinterpretq_bf16_u8(a) simde_vreinterpretq_bf16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_u16(a); - #else - simde_bfloat16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_u16 - #define vreinterpretq_bf16_u16(a) simde_vreinterpretq_bf16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_u32(a); - #else - simde_bfloat16x8_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_u32 - #define vreinterpretq_bf16_u32(a) simde_vreinterpretq_bf16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_u64(a); - #else - simde_bfloat16x8_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_u64 - #define vreinterpretq_bf16_u64(a) simde_vreinterpretq_bf16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_f32(a); - #else - simde_bfloat16x8_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_f32 - #define vreinterpretq_bf16_f32 simde_vreinterpretq_bf16_f32 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_f64(a); - #else - simde_bfloat16x8_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_f64 - #define vreinterpretq_bf16_f64 simde_vreinterpretq_bf16_f64 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vreinterpret_s8_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_s8_bf16(a); - #else - simde_int8x8_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s8_bf16 - #define vreinterpret_s8_bf16(a) simde_vreinterpret_s8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vreinterpret_s16_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_s16_bf16(a); - #else - simde_int16x4_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s16_bf16 - #define vreinterpret_s16_bf16(a) simde_vreinterpret_s16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vreinterpret_s32_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_s32_bf16(a); - #else - simde_int32x2_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s32_bf16 - #define vreinterpret_s32_bf16(a) simde_vreinterpret_s32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vreinterpret_s64_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_s64_bf16(a); - #else - simde_int64x1_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_s64_bf16 - #define vreinterpret_s64_bf16(a) simde_vreinterpret_s64_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vreinterpret_u8_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_u8_bf16(a); - #else - simde_uint8x8_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u8_bf16 - #define vreinterpret_u8_bf16(a) simde_vreinterpret_u8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vreinterpret_u16_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_u16_bf16(a); - #else - simde_uint16x4_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u16_bf16 - #define vreinterpret_u16_bf16(a) simde_vreinterpret_u16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vreinterpret_u32_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_u32_bf16(a); - #else - simde_uint32x2_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u32_bf16 - #define vreinterpret_u32_bf16(a) simde_vreinterpret_u32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vreinterpret_u64_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_u64_bf16(a); - #else - simde_uint64x1_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_u64_bf16 - #define vreinterpret_u64_bf16(a) simde_vreinterpret_u64_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vreinterpret_f32_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_f32_bf16(a); - #else - simde_float32x2_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f32_bf16 - #define vreinterpret_f32_bf16 simde_vreinterpret_f32_bf16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vreinterpret_f64_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_f64_bf16(a); - #else - simde_float64x1_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_f64_bf16 - #define vreinterpret_f64_bf16 simde_vreinterpret_f64_bf16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vreinterpretq_s8_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_s8_bf16(a); - #else - simde_int8x16_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s8_bf16 - #define vreinterpretq_s8_bf16(a) simde_vreinterpretq_s8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vreinterpretq_s16_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_s16_bf16(a); - #else - simde_int16x8_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s16_bf16 - #define vreinterpretq_s16_bf16(a) simde_vreinterpretq_s16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vreinterpretq_s32_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_s32_bf16(a); - #else - simde_int32x4_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s32_bf16 - #define vreinterpretq_s32_bf16(a) simde_vreinterpretq_s32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vreinterpretq_s64_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_s64_bf16(a); - #else - simde_int64x2_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_s64_bf16 - #define vreinterpretq_s64_bf16(a) simde_vreinterpretq_s64_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vreinterpretq_u8_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_u8_bf16(a); - #else - simde_uint8x16_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u8_bf16 - #define vreinterpretq_u8_bf16(a) simde_vreinterpretq_u8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vreinterpretq_u16_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_u16_bf16(a); - #else - simde_uint16x8_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u16_bf16 - #define vreinterpretq_u16_bf16(a) simde_vreinterpretq_u16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vreinterpretq_u32_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_u32_bf16(a); - #else - simde_uint32x4_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u32_bf16 - #define vreinterpretq_u32_bf16(a) simde_vreinterpretq_u32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vreinterpretq_u64_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_u64_bf16(a); - #else - simde_uint64x2_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_u64_bf16 - #define vreinterpretq_u64_bf16(a) simde_vreinterpretq_u64_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vreinterpretq_f32_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_f32_bf16(a); - #else - simde_float32x4_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f32_bf16 - #define vreinterpretq_f32_bf16 simde_vreinterpretq_f32_bf16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vreinterpretq_f64_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_f64_bf16(a); - #else - simde_float64x2_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_f64_bf16 - #define vreinterpretq_f64_bf16 simde_vreinterpretq_f64_bf16 -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_p8(a); - #else - simde_bfloat16x4_private r_; - simde_poly8x8_private a_ = simde_poly8x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_p8 - #define vreinterpret_bf16_p8(a) simde_vreinterpret_bf16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_p16(a); - #else - simde_bfloat16x4_private r_; - simde_poly16x4_private a_ = simde_poly16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_p16 - #define vreinterpret_bf16_p16(a) simde_vreinterpret_bf16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vreinterpret_bf16_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_bf16_p64(a); - #else - simde_bfloat16x4_private r_; - simde_poly64x1_private a_ = simde_poly64x1_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_bf16_p64 - #define vreinterpret_bf16_p64(a) simde_vreinterpret_bf16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_p8(a); - #else - simde_bfloat16x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_p8 - #define vreinterpretq_bf16_p8(a) simde_vreinterpretq_bf16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_p16(a); - #else - simde_bfloat16x8_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_p16 - #define vreinterpretq_bf16_p16(a) simde_vreinterpretq_bf16_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_p64(a); - #else - simde_bfloat16x8_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_p64 - #define vreinterpretq_bf16_p64(a) simde_vreinterpretq_bf16_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vreinterpret_p8_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_p8_bf16(a); - #else - simde_poly8x8_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p8_bf16 - #define vreinterpret_p8_bf16(a) simde_vreinterpret_p8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vreinterpret_p16_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_p16_bf16(a); - #else - simde_poly16x4_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p16_bf16 - #define vreinterpret_p16_bf16(a) simde_vreinterpret_p16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vreinterpret_p64_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpret_p64_bf16(a); - #else - simde_poly64x1_private r_; - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpret_p64_bf16 - #define vreinterpret_p64_bf16(a) simde_vreinterpret_p64_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vreinterpretq_p8_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_p8_bf16(a); - #else - simde_poly8x16_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p8_bf16 - #define vreinterpretq_p8_bf16(a) simde_vreinterpretq_p8_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vreinterpretq_p16_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_p16_bf16(a); - #else - simde_poly16x8_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p16_bf16 - #define vreinterpretq_p16_bf16(a) simde_vreinterpretq_p16_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vreinterpretq_p64_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_p64_bf16(a); - #else - simde_poly64x2_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p64_bf16 - #define vreinterpretq_p64_bf16(a) simde_vreinterpretq_p64_bf16(a) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vreinterpretq_p128_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_p128_bf16(a); - #else - simde_poly128_t r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_memcpy(&r_, &a_, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_p128_bf16 - #define vreinterpretq_p128_bf16(a) simde_vreinterpretq_p128_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vreinterpretq_bf16_p128(simde_poly128_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vreinterpretq_bf16_p128(a); - #else - simde_bfloat16x8_t r_; - simde_poly128_t a_ = a; - simde_memcpy(&r_, &a_, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vreinterpretq_bf16_p128 - #define vreinterpretq_bf16_p128(a) simde_vreinterpretq_bf16_p128(a) -#endif - -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/arm/neon/reinterpret.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vabdh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabdh_f16(a, b); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - simde_float32_t r_ = a_ - b_; - return r_ < 0 ? simde_float16_from_float32(-r_) : simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdh_f16 - #define vabdh_f16(a, b) simde_vabdh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vabds_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabds_f32(a, b); - #else - simde_float32_t r = a - b; - return r < 0 ? -r : r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabds_f32 - #define vabds_f32(a, b) simde_vabds_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vabdd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdd_f64(a, b); - #else - simde_float64_t r = a - b; - return r < 0 ? -r : r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdd_f64 - #define vabdd_f64(a, b) simde_vabdd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vabd_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabd_f16(a, b); - #else - return simde_vabs_f16(simde_vsub_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vabd_f16 - #define vabd_f16(a, b) simde_vabd_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_f32(a, b); - #else - return simde_vabs_f32(simde_vsub_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_f32 - #define vabd_f32(a, b) simde_vabd_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabd_f64(a, b); - #else - return simde_vabs_f64(simde_vsub_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabd_f64 - #define vabd_f64(a, b) simde_vabd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_s8(a, b); - #elif defined(SIMDE_X86_MMX_NATIVE) - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - const __m64 m = _mm_cmpgt_pi8(b_.m64, a_.m64); - r_.m64 = - _mm_xor_si64( - _mm_add_pi8( - _mm_sub_pi8(a_.m64, b_.m64), - m - ), - m - ); - - return simde_int8x8_from_private(r_); - #else - return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_s8 - #define vabd_s8(a, b) simde_vabd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_s16(a, b); - #elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - r_.m64 = _mm_sub_pi16(_mm_max_pi16(a_.m64, b_.m64), _mm_min_pi16(a_.m64, b_.m64)); - - return simde_int16x4_from_private(r_); - #else - return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_s16 - #define vabd_s16(a, b) simde_vabd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_s32(a, b); - #else - return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_s32 - #define vabd_s32(a, b) simde_vabd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_u8(a, b); - #else - return simde_vmovn_u16( - simde_vreinterpretq_u16_s16( - simde_vabsq_s16( - simde_vsubq_s16( - simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), - simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)))))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_u8 - #define vabd_u8(a, b) simde_vabd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_u16(a, b); - #else - return simde_vmovn_u32( - simde_vreinterpretq_u32_s32( - simde_vabsq_s32( - simde_vsubq_s32( - simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), - simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)))))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_u16 - #define vabd_u16(a, b) simde_vabd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabd_u32(a, b); - #else - return simde_vmovn_u64( - simde_vreinterpretq_u64_s64( - simde_vabsq_s64( - simde_vsubq_s64( - simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), - simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)))))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabd_u32 - #define vabd_u32(a, b) simde_vabd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vabdq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vabdq_f16(a, b); - #else - return simde_vabsq_f16(simde_vsubq_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vabdq_f16 - #define vabdq_f16(a, b) simde_vabdq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_f32(a, b); - #else - return simde_vabsq_f32(simde_vsubq_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_f32 - #define vabdq_f32(a, b) simde_vabdq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdq_f64(a, b); - #else - return simde_vabsq_f64(simde_vsubq_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdq_f64 - #define vabdq_f64(a, b) simde_vabdq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_sub_epi8(_mm_max_epi8(a_.m128i, b_.m128i), _mm_min_epi8(a_.m128i, b_.m128i)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi8(b_.m128i, a_.m128i); - r_.m128i = - _mm_xor_si128( - _mm_add_epi8( - _mm_sub_epi8(a_.m128i, b_.m128i), - m - ), - m - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_sub(wasm_i8x16_max(a_.v128, b_.v128), wasm_i8x16_min(a_.v128, b_.v128)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_s8 - #define vabdq_s8(a, b) simde_vabdq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881658604 */ - r_.m128i = _mm_sub_epi16(_mm_max_epi16(a_.m128i, b_.m128i), _mm_min_epi16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_sub(wasm_i16x8_max(a_.v128, b_.v128), wasm_i16x8_min(a_.v128, b_.v128)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = - (a_.values[i] < b_.values[i]) ? - (b_.values[i] - a_.values[i]) : - (a_.values[i] - b_.values[i]); - } - - #endif - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_s16 - #define vabdq_s16(a, b) simde_vabdq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_sub_epi32(_mm_max_epi32(a_.m128i, b_.m128i), _mm_min_epi32(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_sub(wasm_i32x4_max(a_.v128, b_.v128), wasm_i32x4_min(a_.v128, b_.v128)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(b_.m128i, a_.m128i); - r_.m128i = - _mm_xor_si128( - _mm_add_epi32( - _mm_sub_epi32(a_.m128i, b_.m128i), - m - ), - m - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_s32 - #define vabdq_s32(a, b) simde_vabdq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) - return vec_absd(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi8(_mm_max_epu8(a_.m128i, b_.m128i), _mm_min_epu8(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_sub(wasm_u8x16_max(a_.v128, b_.v128), wasm_u8x16_min(a_.v128, b_.v128)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_u8 - #define vabdq_u8(a, b) simde_vabdq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) - return vec_absd(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_sub_epi16(_mm_max_epu16(a_.m128i, b_.m128i), _mm_min_epu16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_sub(wasm_u16x8_max(a_.v128, b_.v128), wasm_u16x8_min(a_.v128, b_.v128)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_u16 - #define vabdq_u16(a, b) simde_vabdq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) - return vec_absd(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sub(vec_max(a, b), vec_min(a, b)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b) - vec_min(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_sub_epi32(_mm_max_epu32(a_.m128i, b_.m128i), _mm_min_epu32(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_sub(wasm_u32x4_max(a_.v128, b_.v128), wasm_u32x4_min(a_.v128, b_.v128)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdq_u32 - #define vabdq_u32(a, b) simde_vabdq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ABD_H) */ -/* :: End simde/arm/neon/abd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/add.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ADD_H) -#define SIMDE_ARM_NEON_ADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16 -simde_vaddh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vaddh_f16(a, b); - #else - simde_float32 af = simde_float16_to_float32(a); - simde_float32 bf = simde_float16_to_float32(b); - return simde_float16_from_float32(af + bf); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaddh_f16 - #define vaddh_f16(a, b) simde_vaddh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vaddd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddd_s64(a, b); - #else - return a + b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddd_s64 - #define vaddd_s64(a, b) simde_vaddd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vaddd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddd_u64(a, b); - #else - return a + b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddd_u64 - #define vaddd_u64(a, b) simde_vaddd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vadd_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vadd_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vadd_f16 - #define vadd_f16(a, b) simde_vadd_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_f32 - #define vadd_f32(a, b) simde_vadd_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vadd_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vadd_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vadd_f64 - #define vadd_f64(a, b) simde_vadd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vadd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #elif defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi8(a_.m64, b_.m64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_s8 - #define vadd_s8(a, b) simde_vadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vadd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #elif defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi16(a_.m64, b_.m64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_s16 - #define vadd_s16(a, b) simde_vadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vadd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #elif defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi32(a_.m64, b_.m64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_s32 - #define vadd_s32(a, b) simde_vadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vadd_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_s64 - #define vadd_s64(a, b) simde_vadd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_u8 - #define vadd_u8(a, b) simde_vadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_u16 - #define vadd_u16(a, b) simde_vadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_u32 - #define vadd_u32(a, b) simde_vadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vadd_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_u64 - #define vadd_u64(a, b) simde_vadd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vaddq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vaddq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaddq_f16 - #define vaddq_f16(a, b) simde_vaddq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; - a_ = a; - b_ = b; - r_ = vec_add(a_, b_); - return r_; - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_add_ps(a_.m128, b_.m128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_f32 - #define vaddq_f32(a, b) simde_vaddq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_add(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_add_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddq_f64 - #define vaddq_f64(a, b) simde_vaddq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_add_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_s8 - #define vaddq_s8(a, b) simde_vaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_add_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_s16 - #define vaddq_s16(a, b) simde_vaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_add_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_s32 - #define vaddq_s32(a, b) simde_vaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_add(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_add_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_add(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_s64 - #define vaddq_s64(a, b) simde_vaddq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_u8 - #define vaddq_u8(a, b) simde_vaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_u16 - #define vaddq_u16(a, b) simde_vaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_add(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_u32 - #define vaddq_u32(a, b) simde_vaddq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_add(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values + b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_u64 - #define vaddq_u64(a, b) simde_vaddq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vadd_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(_GCC_ARM_NEON_H) - return vadd_p8(a, b); - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFF); - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_p8 - #define vadd_p8(a, b) simde_vadd_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vadd_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(_GCC_ARM_NEON_H) - return vadd_p16(a, b); - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFFFF); - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_p16 - #define vadd_p16(a, b) simde_vadd_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vadd_p64(simde_poly64x1_t a, simde_poly64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_CRYPTO) && \ - !defined(_GCC_ARM_NEON_H) - return vadd_p64(a, b); - #else - simde_poly64x1_private - r_, - a_ = simde_poly64x1_to_private(a), - b_ = simde_poly64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFFFFFFFFFFFFFFFF); - } - - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vadd_p64 - #define vadd_p64(a, b) simde_vadd_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vaddq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(_GCC_ARM_NEON_H) - return vaddq_p8(a, b); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFF); - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_p8 - #define vaddq_p8(a, b) simde_vaddq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vaddq_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(_GCC_ARM_NEON_H) - return vaddq_p16(a, b); - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFFFF); - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddq_p16 - #define vaddq_p16(a, b) simde_vaddq_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vaddq_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_CRYPTO) && \ - !defined(_GCC_ARM_NEON_H) - return vaddq_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = b_.values[i] ^ ((0 ^ a_.values[i]) & 0xFFFFFFFFFFFFFFFF); - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaddq_p64 - #define vaddq_p64(a, b) simde_vaddq_p64((a), (b)) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vaddq_p128(simde_poly128_t a, simde_poly128_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_CRYPTO) && \ - !defined(_GCC_ARM_NEON_H) - return vaddq_p128(a, b); - #else - simde_poly128_t mask = 0xFFFFFFFFFFFFFFFFull; - mask = mask << 64; - mask = mask | 0xFFFFFFFFFFFFFFFFull; - return b ^ ((0 ^ a) & mask); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaddq_p128 - #define vaddq_p128(a, b) simde_vaddq_p128((a), (b)) -#endif -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADD_H) */ -/* :: End simde/arm/neon/add.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_s8(a, b, c); - #else - return simde_vadd_s8(simde_vabd_s8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_s8 - #define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_s16(a, b, c); - #else - return simde_vadd_s16(simde_vabd_s16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_s16 - #define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_s32(a, b, c); - #else - return simde_vadd_s32(simde_vabd_s32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_s32 - #define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_u8(a, b, c); - #else - return simde_vadd_u8(simde_vabd_u8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_u8 - #define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_u16(a, b, c); - #else - return simde_vadd_u16(simde_vabd_u16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_u16 - #define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaba_u32(a, b, c); - #else - return simde_vadd_u32(simde_vabd_u32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaba_u32 - #define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_s8(a, b, c); - #else - return simde_vaddq_s8(simde_vabdq_s8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_s8 - #define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_s16(a, b, c); - #else - return simde_vaddq_s16(simde_vabdq_s16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_s16 - #define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_s32(a, b, c); - #else - return simde_vaddq_s32(simde_vabdq_s32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_s32 - #define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_u8(a, b, c); - #else - return simde_vaddq_u8(simde_vabdq_u8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_u8 - #define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_u16(a, b, c); - #else - return simde_vaddq_u16(simde_vabdq_u16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_u16 - #define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabaq_u32(a, b, c); - #else - return simde_vaddq_u32(simde_vabdq_u32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabaq_u32 - #define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ABA_H) */ -/* :: End simde/arm/neon/aba.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abal.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ABAL_H) -#define SIMDE_ARM_NEON_ABAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abdl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ABDL_H) -#define SIMDE_ARM_NEON_ABDL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_s8(a, b); - #else - return simde_vabsq_s16(simde_vsubl_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_s8 - #define vabdl_s8(a, b) simde_vabdl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_s16(a, b); - #else - return simde_vabsq_s32(simde_vsubl_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_s16 - #define vabdl_s16(a, b) simde_vabdl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_s32(a, b); - #else - return simde_vabsq_s64(simde_vsubl_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_s32 - #define vabdl_s32(a, b) simde_vabdl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_u8(a, b); - #else - return simde_vreinterpretq_u16_s16( - simde_vabsq_s16( - simde_vsubq_s16( - simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), - simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)) - ) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_u8 - #define vabdl_u8(a, b) simde_vabdl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_u16(a, b); - #else - return simde_vreinterpretq_u32_s32( - simde_vabsq_s32( - simde_vsubq_s32( - simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), - simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)) - ) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_u16 - #define vabdl_u16(a, b) simde_vabdl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabdl_u32(a, b); - #else - return simde_vreinterpretq_u64_s64( - simde_vabsq_s64( - simde_vsubq_s64( - simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), - simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)) - ) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabdl_u32 - #define vabdl_u32(a, b) simde_vabdl_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */ -/* :: End simde/arm/neon/abdl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_s8(a, b, c); - #else - return simde_vaddq_s16(simde_vabdl_s8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_s8 - #define vabal_s8(a, b, c) simde_vabal_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_s16(a, b, c); - #else - return simde_vaddq_s32(simde_vabdl_s16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_s16 - #define vabal_s16(a, b, c) simde_vabal_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vabal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_s32(a, b, c); - #else - return simde_vaddq_s64(simde_vabdl_s32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_s32 - #define vabal_s32(a, b, c) simde_vabal_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_u8(a, b, c); - #else - return simde_vaddq_u16(simde_vabdl_u8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_u8 - #define vabal_u8(a, b, c) simde_vabal_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_u16(a, b, c); - #else - return simde_vaddq_u32(simde_vabdl_u16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_u16 - #define vabal_u16(a, b, c) simde_vabal_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vabal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vabal_u32(a, b, c); - #else - return simde_vaddq_u64(simde_vabdl_u32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vabal_u32 - #define vabal_u32(a, b, c) simde_vabal_u32((a), (b), (c)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_abal_H) */ -/* :: End simde/arm/neon/abal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abal_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ABAL_HIGH_H) -#define SIMDE_ARM_NEON_ABAL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_s8(a, b, c); - #else - return simde_vaddq_s16(simde_vabdl_s8(simde_vget_high_s8(b), simde_vget_high_s8(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_s8 - #define vabal_high_s8(a, b, c) simde_vabal_high_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_s16(a, b, c); - #else - return simde_vaddq_s32(simde_vabdl_s16(simde_vget_high_s16(b), simde_vget_high_s16(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_s16 - #define vabal_high_s16(a, b, c) simde_vabal_high_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vabal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_s32(a, b, c); - #else - return simde_vaddq_s64(simde_vabdl_s32(simde_vget_high_s32(b), simde_vget_high_s32(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_s32 - #define vabal_high_s32(a, b, c) simde_vabal_high_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_u8(a, b, c); - #else - return simde_vaddq_u16(simde_vabdl_u8(simde_vget_high_u8(b), simde_vget_high_u8(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_u8 - #define vabal_high_u8(a, b, c) simde_vabal_high_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_u16(a, b, c); - #else - return simde_vaddq_u32(simde_vabdl_u16(simde_vget_high_u16(b), simde_vget_high_u16(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_u16 - #define vabal_high_u16(a, b, c) simde_vabal_high_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vabal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabal_high_u32(a, b, c); - #else - return simde_vaddq_u64(simde_vabdl_u32(simde_vget_high_u32(b), simde_vget_high_u32(c)), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabal_high_u32 - #define vabal_high_u32(a, b, c) simde_vabal_high_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_abal_H) */ -/* :: End simde/arm/neon/abal_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/abdl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) -#define SIMDE_ARM_NEON_ABDL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vabdl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_s8(a, b); - #else - return simde_vabdl_s8(simde_vget_high_s8(a), simde_vget_high_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_s8 - #define vabdl_high_s8(a, b) simde_vabdl_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vabdl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_s16(a, b); - #else - return simde_vabdl_s16(simde_vget_high_s16(a), simde_vget_high_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_s16 - #define vabdl_high_s16(a, b) simde_vabdl_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vabdl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_s32(a, b); - #else - return simde_vabdl_s32(simde_vget_high_s32(a), simde_vget_high_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_s32 - #define vabdl_high_s32(a, b) simde_vabdl_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vabdl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_u8(a, b); - #else - return simde_vabdl_u8(simde_vget_high_u8(a), simde_vget_high_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_u8 - #define vabdl_high_u8(a, b) simde_vabdl_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vabdl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_u16(a, b); - #else - return simde_vabdl_u16(simde_vget_high_u16(a), simde_vget_high_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_u16 - #define vabdl_high_u16(a, b) simde_vabdl_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vabdl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vabdl_high_u32(a, b); - #else - return simde_vabdl_u32(simde_vget_high_u32(a), simde_vget_high_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vabdl_high_u32 - #define vabdl_high_u32(a, b) simde_vabdl_high_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) */ -/* :: End simde/arm/neon/abdl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addhn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ADDHN_H) -#define SIMDE_ARM_NEON_ADDHN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shr_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHR_N_H) -#define SIMDE_ARM_NEON_SHR_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_vshrh_n_s16(int16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return a >> ((n == 16) ? 15 : n); -} - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_x_vshrh_n_u16(uint16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return (n == 16) ? 0 : a >> n; -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_x_vshrs_n_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return a >> ((n == 32) ? 31 : n); -} - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_x_vshrs_n_u32(uint32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return (n == 32) ? 0 : a >> n; -} - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vshrd_n_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return a >> ((n == 64) ? 63 : n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrd_n_s64(a, n) vshrd_n_s64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrd_n_s64 - #define vshrd_n_s64(a, n) simde_vshrd_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vshrd_n_u64(uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return (n == 64) ? 0 : a >> n; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrd_n_u64(a, n) vshrd_n_u64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrd_n_u64 - #define vshrd_n_u64(a, n) simde_vshrd_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vshr_n_s8 (const simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - int32_t n_ = (n == 8) ? 7 : n; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values >> n_; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> n_); - } - #endif - - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_s8(a, n) vshr_n_s8((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_s8(a, n) \ - simde_int8x8_from_m64( \ - _mm_or_si64(_mm_andnot_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16(simde_int8x8_to_m64(a), (n))), \ - _mm_and_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16(_mm_slli_pi16(simde_int8x8_to_m64(a), 8), 8 + (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_s8 - #define vshr_n_s8(a, n) simde_vshr_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vshr_n_s16 (const simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - int32_t n_ = (n == 16) ? 15 : n; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values >> n_; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> n_); - } - #endif - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_s16(a, n) vshr_n_s16((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_s16(a, n) simde_int16x4_from_m64(_mm_srai_pi16(simde_int16x4_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_s16 - #define vshr_n_s16(a, n) simde_vshr_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vshr_n_s32 (const simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - int32_t n_ = (n == 32) ? 31 : n; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n_; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n_; - } - #endif - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_s32(a, n) vshr_n_s32((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_s32(a, n) simde_int32x2_from_m64(_mm_srai_pi32(simde_int32x2_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_s32 - #define vshr_n_s32(a, n) simde_vshr_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vshr_n_s64 (const simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - int32_t n_ = (n == 64) ? 63 : n; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n_; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n_; - } - #endif - - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_s64(a, n) vshr_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_s64 - #define vshr_n_s64(a, n) simde_vshr_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vshr_n_u8 (const simde_uint8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - if (n == 8) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_u8(a, n) vshr_n_u8((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_u8(a, n) \ - simde_uint8x8_from_m64(_mm_and_si64(_mm_srli_si64(simde_uint8x8_to_m64(a), (n)), _mm_set1_pi8((1 << (8 - (n))) - 1))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_u8 - #define vshr_n_u8(a, n) simde_vshr_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vshr_n_u16 (const simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - - if (n == 16) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_u16(a, n) vshr_n_u16((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_u16(a, n) simde_uint16x4_from_m64(_mm_srli_pi16(simde_uint16x4_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_u16 - #define vshr_n_u16(a, n) simde_vshr_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vshr_n_u32 (const simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - - if (n == 32) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_u32(a, n) vshr_n_u32((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_u32(a, n) simde_uint32x2_from_m64(_mm_srli_pi32(simde_uint32x2_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_u32 - #define vshr_n_u32(a, n) simde_vshr_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vshr_n_u64 (const simde_uint64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - - if (n == 64) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshr_n_u64(a, n) vshr_n_u64((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshr_n_u64(a, n) simde_uint64x1_from_m64(_mm_srli_si64(simde_uint64x1_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshr_n_u64 - #define vshr_n_u64(a, n) simde_vshr_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vshrq_n_s8 (const simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_GFNI_NATIVE) - /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ - const int shift = (n <= 7) ? n : 7; - const uint64_t matrix = (UINT64_C(0x8182848890A0C000) << (shift * 8)) ^ UINT64_C(0x8080808080808080); - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, matrix)), 0); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_blendv_epi8(_mm_srai_epi16(a_.m128i, n), - _mm_srai_epi16(_mm_slli_epi16(a_.m128i, 8), 8 + (n)), - _mm_set1_epi16(0x00FF)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = - _mm_or_si128(_mm_andnot_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16(a_.m128i, n)), - _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16(_mm_slli_epi16(a_.m128i, 8), 8 + (n)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shr(a_.v128, ((n) == 8) ? 7 : HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> ((n == 8) ? 7 : n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> ((n == 8) ? 7 : n)); - } - #endif - - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_s8(a, n) vshrq_n_s8((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_s8(a, n) vec_sra((a), vec_splat_u8(((n) == 8) ? 7 : (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_s8 - #define vshrq_n_s8(a, n) simde_vshrq_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vshrq_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_srai_epi16(a_.m128i, n); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shr(a_.v128, ((n) == 16) ? 15 : HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> ((n == 16) ? 15 : n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> ((n == 16) ? 15 : n)); - } - #endif - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_s16(a, n) vshrq_n_s16((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_s16(a, n) vec_sra((a), vec_splat_u16(((n) == 16) ? 15 : (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_s16 - #define vshrq_n_s16(a, n) simde_vshrq_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vshrq_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_srai_epi32(a_.m128i, n); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shr(a_.v128, ((n) == 32) ? 31 : HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> ((n == 32) ? 31 : n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> ((n == 32) ? 31 : n); - } - #endif - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_s32(a, n) vshrq_n_s32((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_s32(a, n) \ - vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, ((n) == 32) ? 31 : (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_s32 - #define vshrq_n_s32(a, n) simde_vshrq_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vshrq_n_s64 (const simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shr(a_.v128, ((n) == 64) ? 63 : HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> ((n == 64) ? 63 : n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> ((n == 64) ? 63 : n); - } - #endif - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_s64(a, n) vshrq_n_s64((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - #define simde_vshrq_n_s64(a, n) \ - vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, ((n) == 64) ? 63 : (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_s64 - #define vshrq_n_s64(a, n) simde_vshrq_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vshrq_n_u8 (const simde_uint8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - #if defined(SIMDE_X86_GFNI_NATIVE) - /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ - r_.m128i = (n > 7) ? _mm_setzero_si128() : _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) << (n * 8)), 0); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(_mm_srli_epi64(a_.m128i, (n)), _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (8 - (n))) - 1))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = (((n) == 8) ? wasm_i8x16_splat(0) : wasm_u8x16_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - #else - if (n == 8) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - #endif - - return simde_uint8x16_from_private(r_);\ -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_u8(a, n) vshrq_n_u8((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_u8(a, n) \ - (((n) == 8) ? vec_splat_u8(0) : vec_sr((a), vec_splat_u8(n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_u8 - #define vshrq_n_u8(a, n) simde_vshrq_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vshrq_n_u16 (const simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_srli_epi16(a_.m128i, n); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = (((n) == 16) ? wasm_i16x8_splat(0) : wasm_u16x8_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - #else - if (n == 16) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - #endif - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_u16(a, n) vshrq_n_u16((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_u16(a, n) \ - (((n) == 16) ? vec_splat_u16(0) : vec_sr((a), vec_splat_u16(n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_u16 - #define vshrq_n_u16(a, n) simde_vshrq_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vshrq_n_u32 (const simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_srli_epi32(a_.m128i, n); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = (((n) == 32) ? wasm_i32x4_splat(0) : wasm_u32x4_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - #else - if (n == 32) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - #endif - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_u32(a, n) vshrq_n_u32((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshrq_n_u32(a, n) \ - (((n) == 32) ? vec_splat_u32(0) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_u32 - #define vshrq_n_u32(a, n) simde_vshrq_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vshrq_n_u64 (const simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_srli_epi64(a_.m128i, n); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = (((n) == 64) ? wasm_i64x2_splat(0) : wasm_u64x2_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - #else - if (n == 64) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) - r_.values = a_.values >> n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] >> n; - } - #endif - } - #endif - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrq_n_u64(a, n) vshrq_n_u64((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - #define simde_vshrq_n_u64(a, n) \ - (((n) == 64) ? vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0)) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrq_n_u64 - #define vshrq_n_u64(a, n) simde_vshrq_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHR_N_H) */ -/* :: End simde/arm/neon/shr_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vaddhn_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_s16(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int8x8_private r_; - simde_int8x16_private tmp_ = - simde_int8x16_to_private( - simde_vreinterpretq_s8_s16( - simde_vaddq_s16(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #endif - return simde_int8x8_from_private(r_); - #else - return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddq_s16(a, b), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_s16 - #define vaddhn_s16(a, b) simde_vaddhn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vaddhn_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_s32(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int16x4_private r_; - simde_int16x8_private tmp_ = - simde_int16x8_to_private( - simde_vreinterpretq_s16_s32( - simde_vaddq_s32(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); - #endif - return simde_int16x4_from_private(r_); - #else - return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddq_s32(a, b), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_s32 - #define vaddhn_s32(a, b) simde_vaddhn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vaddhn_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_s64(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int32x2_private r_; - simde_int32x4_private tmp_ = - simde_int32x4_to_private( - simde_vreinterpretq_s32_s64( - simde_vaddq_s64(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); - #endif - return simde_int32x2_from_private(r_); - #else - return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddq_s64(a, b), 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_s64 - #define vaddhn_s64(a, b) simde_vaddhn_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vaddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_u16(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint8x8_private r_; - simde_uint8x16_private tmp_ = - simde_uint8x16_to_private( - simde_vreinterpretq_u8_u16( - simde_vaddq_u16(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #endif - return simde_uint8x8_from_private(r_); - #else - return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddq_u16(a, b), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_u16 - #define vaddhn_u16(a, b) simde_vaddhn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vaddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_u32(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint16x4_private r_; - simde_uint16x8_private tmp_ = - simde_uint16x8_to_private( - simde_vreinterpretq_u16_u32( - simde_vaddq_u32(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); - #endif - return simde_uint16x4_from_private(r_); - #else - return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddq_u32(a, b), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_u32 - #define vaddhn_u32(a, b) simde_vaddhn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vaddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddhn_u64(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint32x2_private r_; - simde_uint32x4_private tmp_ = - simde_uint32x4_to_private( - simde_vreinterpretq_u32_u64( - simde_vaddq_u64(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); - #endif - return simde_uint32x2_from_private(r_); - #else - return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddq_u64(a, b), 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddhn_u64 - #define vaddhn_u64(a, b) simde_vaddhn_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDHN_H) */ -/* :: End simde/arm/neon/addhn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addhn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ADDHN_HIGH_H) -#define SIMDE_ARM_NEON_ADDHN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vaddhn_high_s16(simde_int8x8_t r, simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_s16(r, a, b); - #else - return simde_vcombine_s8(r, simde_vaddhn_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_s16 - #define vaddhn_high_s16(r, a, b) simde_vaddhn_high_s16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddhn_high_s32(simde_int16x4_t r, simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_s32(r, a, b); - #else - return simde_vcombine_s16(r, simde_vaddhn_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_s32 - #define vaddhn_high_s32(r, a, b) simde_vaddhn_high_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddhn_high_s64(simde_int32x2_t r, simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_s64(r, a, b); - #else - return simde_vcombine_s32(r, simde_vaddhn_s64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_s64 - #define vaddhn_high_s64(r, a, b) simde_vaddhn_high_s64((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaddhn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_u16(r, a, b); - #else - return simde_vcombine_u8(r, simde_vaddhn_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_u16 - #define vaddhn_high_u16(r, a, b) simde_vaddhn_high_u16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddhn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_u32(r, a, b); - #else - return simde_vcombine_u16(r, simde_vaddhn_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_u32 - #define vaddhn_high_u32(r, a, b) simde_vaddhn_high_u32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddhn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddhn_high_u64(r, a, b); - #else - return simde_vcombine_u32(r, simde_vaddhn_u64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddhn_high_u64 - #define vaddhn_high_u64(r, a, b) simde_vaddhn_high_u64((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDHN_HIGH_H) */ -/* :: End simde/arm/neon/addhn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_ADDL_H) -#define SIMDE_ARM_NEON_ADDL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddl_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_s8(a, b); - #else - return simde_vaddq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_s8 - #define vaddl_s8(a, b) simde_vaddl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddl_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_s16(a, b); - #else - return simde_vaddq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_s16 - #define vaddl_s16(a, b) simde_vaddl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vaddl_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_s32(a, b); - #else - return simde_vaddq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_s32 - #define vaddl_s32(a, b) simde_vaddl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_u8(a, b); - #else - return simde_vaddq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_u8 - #define vaddl_u8(a, b) simde_vaddl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_u16(a, b); - #else - return simde_vaddq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_u16 - #define vaddl_u16(a, b) simde_vaddl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vaddl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddl_u32(a, b); - #else - return simde_vaddq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddl_u32 - #define vaddl_u32(a, b) simde_vaddl_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDL_H) */ -/* :: End simde/arm/neon/addl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addlv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ADDLV_H) -#define SIMDE_ARM_NEON_ADDLV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ADDV_H) -#define SIMDE_ARM_NEON_ADDV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vaddv_f32(simde_float32x2_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_f32 - #define vaddv_f32(v) simde_vaddv_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vaddv_s8(simde_int8x8_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_s8(a); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_s8 - #define vaddv_s8(v) simde_vaddv_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vaddv_s16(simde_int16x4_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_s16(a); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_s16 - #define vaddv_s16(v) simde_vaddv_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vaddv_s32(simde_int32x2_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_s32(a); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_s32 - #define vaddv_s32(v) simde_vaddv_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vaddv_u8(simde_uint8x8_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_u8(a); - #else - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_u8 - #define vaddv_u8(v) simde_vaddv_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vaddv_u16(simde_uint16x4_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_u16(a); - #else - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_u16 - #define vaddv_u16(v) simde_vaddv_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vaddv_u32(simde_uint32x2_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddv_u32(a); - #else - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddv_u32 - #define vaddv_u32(v) simde_vaddv_u32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vaddvq_f32(simde_float32x4_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_f32 - #define vaddvq_f32(v) simde_vaddvq_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vaddvq_f64(simde_float64x2_t a) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_f64 - #define vaddvq_f64(v) simde_vaddvq_f64(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vaddvq_s8(simde_int8x16_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_s8(a); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_s8 - #define vaddvq_s8(v) simde_vaddvq_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vaddvq_s16(simde_int16x8_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_s16(a); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_s16 - #define vaddvq_s16(v) simde_vaddvq_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vaddvq_s32(simde_int32x4_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_s32(a); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_s32 - #define vaddvq_s32(v) simde_vaddvq_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vaddvq_s64(simde_int64x2_t a) { - int64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_s64(a); - #else - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_s64 - #define vaddvq_s64(v) simde_vaddvq_s64(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vaddvq_u8(simde_uint8x16_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i a_ = simde_uint8x16_to_m128i(a); - a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); - a_ = _mm_add_epi8(a_, _mm_shuffle_epi32(a_, 0xEE)); - return HEDLEY_STATIC_CAST(uint8_t, _mm_cvtsi128_si32(a_)); - #else - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_u8 - #define vaddvq_u8(v) simde_vaddvq_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vaddvq_u16(simde_uint16x8_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(a); - #else - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_u16 - #define vaddvq_u16(v) simde_vaddvq_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vaddvq_u32(simde_uint32x4_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u32(a); - #else - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_u32 - #define vaddvq_u32(v) simde_vaddvq_u32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vaddvq_u64(simde_uint64x2_t a) { - uint64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u64(a); - #else - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddvq_u64 - #define vaddvq_u64(v) simde_vaddvq_u64(v) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDV_H) */ -/* :: End simde/arm/neon/addv.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vaddlv_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_s16(simde_vmovl_s8(a)); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - int16_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_s8 - #define vaddlv_s8(a) simde_vaddlv_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vaddlv_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_s32(simde_vmovl_s16(a)); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - int32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_s16 - #define vaddlv_s16(a) simde_vaddlv_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vaddlv_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_s64(simde_vmovl_s32(a)); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - int64_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_s32 - #define vaddlv_s32(a) simde_vaddlv_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vaddlv_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_u8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_u16(simde_vmovl_u8(a)); - #else - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - uint16_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_u8 - #define vaddlv_u8(a) simde_vaddlv_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vaddlv_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_u16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_u32(simde_vmovl_u16(a)); - #else - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - uint32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_u16 - #define vaddlv_u16(a) simde_vaddlv_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vaddlv_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlv_u32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddvq_u64(simde_vmovl_u32(a)); - #else - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - uint64_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlv_u32 - #define vaddlv_u32(a) simde_vaddlv_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vaddlvq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_s8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i a_ = simde_int8x16_to_m128i(a); - a_ = _mm_xor_si128(a_, _mm_set1_epi8('\x80')); - a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); - a_ = _mm_add_epi16(a_, _mm_shuffle_epi32(a_, 0xEE)); - return HEDLEY_STATIC_CAST(int16_t, _mm_cvtsi128_si32(a_) - 2048); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - int16_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_s8 - #define vaddlvq_s8(a) simde_vaddlvq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vaddlvq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_s16(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) && !defined(HEDLEY_MSVC_VERSION) - __m128i a_ = simde_int16x8_to_m128i(a); - a_ = _mm_xor_si128(a_, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, 0x8000))); - a_ = _mm_shuffle_epi8(a_, _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); - a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); - a_ = _mm_add_epi32(a_, _mm_srli_si128(a_, 7)); - return _mm_cvtsi128_si32(a_) - 262144; - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - int32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_s16 - #define vaddlvq_s16(a) simde_vaddlvq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vaddlvq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_s32(a); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - int64_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_s32 - #define vaddlvq_s32(a) simde_vaddlvq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vaddlvq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_u8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i a_ = simde_uint8x16_to_m128i(a); - a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); - a_ = _mm_add_epi16(a_, _mm_shuffle_epi32(a_, 0xEE)); - return HEDLEY_STATIC_CAST(uint16_t, _mm_cvtsi128_si32(a_)); - #else - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - uint16_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_u8 - #define vaddlvq_u8(a) simde_vaddlvq_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vaddlvq_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_u16(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i a_ = simde_uint16x8_to_m128i(a); - a_ = _mm_shuffle_epi8(a_, _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); - a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); - a_ = _mm_add_epi32(a_, _mm_srli_si128(a_, 7)); - return HEDLEY_STATIC_CAST(uint32_t, _mm_cvtsi128_si32(a_)); - #else - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - uint32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_u16 - #define vaddlvq_u16(a) simde_vaddlvq_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vaddlvq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddlvq_u32(a); - #else - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - uint64_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(+:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r += a_.values[i]; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddlvq_u32 - #define vaddlvq_u32(a) simde_vaddlvq_u32(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */ -/* :: End simde/arm/neon/addlv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) -#define SIMDE_ARM_NEON_ADDL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_s8(a, b); - #else - return simde_vaddq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_s8 - #define vaddl_high_s8(a, b) simde_vaddl_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_s16(a, b); - #else - return simde_vaddq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_s16 - #define vaddl_high_s16(a, b) simde_vaddl_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vaddl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_s32(a, b); - #else - return simde_vaddq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_s32 - #define vaddl_high_s32(a, b) simde_vaddl_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_u8(a, b); - #else - return simde_vaddq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_u8 - #define vaddl_high_u8(a, b) simde_vaddl_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_u16(a, b); - #else - return simde_vaddq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_u16 - #define vaddl_high_u16(a, b) simde_vaddl_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vaddl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddl_high_u32(a, b); - #else - return simde_vaddq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddl_high_u32 - #define vaddl_high_u32(a, b) simde_vaddl_high_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) */ -/* :: End simde/arm/neon/addl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addw.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_ADDW_H) -#define SIMDE_ARM_NEON_ADDW_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s16(a, simde_vmovl_s8(b)); - #else - simde_int16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_s8 - #define vaddw_s8(a, b) simde_vaddw_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s32(a, simde_vmovl_s16(b)); - #else - simde_int32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_s16 - #define vaddw_s16(a, b) simde_vaddw_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s64(a, simde_vmovl_s32(b)); - #else - simde_int64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_s32 - #define vaddw_s32(a, b) simde_vaddw_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u16(a, simde_vmovl_u8(b)); - #else - simde_uint16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_u8 - #define vaddw_u8(a, b) simde_vaddw_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u32(a, simde_vmovl_u16(b)); - #else - simde_uint32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_uint16x4_private b_ = simde_uint16x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_u16 - #define vaddw_u16(a, b) simde_vaddw_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vaddw_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u64(a, simde_vmovl_u32(b)); - #else - simde_uint64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_uint32x2_private b_ = simde_uint32x2_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values += a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vaddw_u32 - #define vaddw_u32(a, b) simde_vaddw_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDW_H) */ -/* :: End simde/arm/neon/addw.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/addw_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) -#define SIMDE_ARM_NEON_ADDW_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s16(a, simde_vmovl_high_s8(b)); - #else - simde_int16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_s8 - #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s32(a, simde_vmovl_high_s16(b)); - #else - simde_int32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_s16 - #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_s64(a, simde_vmovl_high_s32(b)); - #else - simde_int64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_s32 - #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u16(a, simde_vmovl_high_u8(b)); - #else - simde_uint16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_u8 - #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u32(a, simde_vmovl_high_u16(b)); - #else - simde_uint32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_uint16x8_private b_ = simde_uint16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_u16 - #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vaddw_high_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vaddq_u64(a, simde_vmovl_high_u32(b)); - #else - simde_uint64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_uint32x4_private b_ = simde_uint32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vaddw_high_u32 - #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */ -/* :: End simde/arm/neon/addw_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/aes.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_AES_H) -#define SIMDE_ARM_NEON_AES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-aes.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_AES_H) -#define SIMDE_AES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) - -/* - * Number of columns (32-bit words) comprising the State. For this - * standard, Nb = 4. - */ -#define simde_x_aes_Nb 4 - -static uint8_t simde_x_aes_gmult_lookup_table[8][256] = { -{ // gmult(0x02, b); - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, - 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, - 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, - 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, - 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, - 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, - 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, - 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, - 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, - 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, - 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, - 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, - 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, - 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, - 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 -}, -{ // gmult(0x01, b); - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}, -{ // gmult(0x01, b); - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}, -{ // gmult(0x03, b); - 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, - 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, - 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, - 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, - 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, - 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, - 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, - 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, - 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, - 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, - 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, - 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, - 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, - 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, - 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, - 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a, -}, -{ // gmult(0x0e, b); - 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, - 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, - 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, - 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, - 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, - 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, - 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, - 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, - 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, - 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, - 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, - 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, - 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, - 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, - 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, - 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, -}, -{ // gmult(0x09, b); - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, - 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, - 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, - 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, - 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, - 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, - 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, - 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, - 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, - 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, - 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, - 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, - 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46, - -}, -{ // gmult(0x0d, b); - 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, - 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, - 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, - 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, - 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, - 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, - 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, - 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, - 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, - 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, - 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, - 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, - 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, - 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, - 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, - 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97, -}, -{ // gmult(0x0b, b); - 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, - 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, - 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, - 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, - 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, - 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, - 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, - 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, - 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, - 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, - 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, - 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, - 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, - 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, - 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, - 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3, -} -}; - -/* - * S-box transformation table - */ -static uint8_t simde_x_aes_s_box[256] = { - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, // 0 - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, // 1 - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, // 2 - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, // 3 - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, // 4 - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, // 5 - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, // 6 - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, // 7 - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, // 8 - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, // 9 - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, // a - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, // b - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, // c - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, // d - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, // e - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};// f - -/* - * Inverse S-box transformation table - */ -static uint8_t simde_x_aes_inv_s_box[256] = { - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, // 0 - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, // 1 - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, // 2 - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, // 3 - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, // 4 - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, // 5 - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, // 6 - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, // 7 - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, // 8 - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, // 9 - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, // a - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, // b - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, // c - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, // d - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, // e - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};// f - -/* - * Multiplication of 4 byte words - * m(x) = x4+1 - -SIMDE_FUNCTION_ATTRIBUTES -void coef_mult(uint8_t *a, uint8_t *b, uint8_t *d) { - - d[0] = gmult(a[0],b[0])^gmult(a[3],b[1])^gmult(a[2],b[2])^gmult(a[1],b[3]); - d[1] = gmult(a[1],b[0])^gmult(a[0],b[1])^gmult(a[3],b[2])^gmult(a[2],b[3]); - d[2] = gmult(a[2],b[0])^gmult(a[1],b[1])^gmult(a[0],b[2])^gmult(a[3],b[3]); - d[3] = gmult(a[3],b[0])^gmult(a[2],b[1])^gmult(a[1],b[2])^gmult(a[0],b[3]); -} -*/ - -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_coef_mult_lookup(int lookup_table_offset, uint8_t *b, uint8_t *d) { - int o = lookup_table_offset; - - #define gmultl(o,b) simde_x_aes_gmult_lookup_table[o][b] - d[0] = gmultl(o+0,b[0])^gmultl(o+3,b[1])^gmultl(o+2,b[2])^gmultl(o+1,b[3]); - d[1] = gmultl(o+1,b[0])^gmultl(o+0,b[1])^gmultl(o+3,b[2])^gmultl(o+2,b[3]); - d[2] = gmultl(o+2,b[0])^gmultl(o+1,b[1])^gmultl(o+0,b[2])^gmultl(o+3,b[3]); - d[3] = gmultl(o+3,b[0])^gmultl(o+2,b[1])^gmultl(o+1,b[2])^gmultl(o+0,b[3]); - #undef gmultl -} - -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_AES_H) */ -/* :: End simde/simde-aes.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -static uint8_t simde_xtime(uint8_t x) -{ - return HEDLEY_STATIC_CAST(uint8_t, (x<<1) ^ (((x>>7) & 1) * 0x1b)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaeseq_u8(simde_uint8x16_t data, simde_uint8x16_t key) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_AES) - return vaeseq_u8(data, key); - #else - /* ref: https://github.com/kokke/tiny-AES-c/blob/master/aes.c */ - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(data), - b_ = simde_uint8x16_to_private(key); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - // AESShiftRows - uint8_t tmp; - tmp = r_.values[1]; - r_.values[1] = r_.values[5]; - r_.values[5] = r_.values[9]; - r_.values[9] = r_.values[13]; - r_.values[13] = tmp; - - tmp = r_.values[2]; - r_.values[2] = r_.values[10]; - r_.values[10] = tmp; - - tmp = r_.values[6]; - r_.values[6] = r_.values[14]; - r_.values[14] = tmp; - - tmp = r_.values[3]; - r_.values[3] = r_.values[15]; - r_.values[15] = r_.values[11]; - r_.values[11] = r_.values[7]; - r_.values[7] = tmp; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_aes_s_box[r_.values[i]]; - } - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaeseq_u8 - #define vaeseq_u8(data, key) simde_vaeseq_u8((data), (key)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaesdq_u8(simde_uint8x16_t data, simde_uint8x16_t key) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_AES) - return vaesdq_u8(data, key); - #else - /* ref: https://github.com/kokke/tiny-AES-c/blob/master/aes.c */ - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(data), - b_ = simde_uint8x16_to_private(key); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - // AESInvShiftRows - uint8_t tmp; - tmp = r_.values[13]; - r_.values[13] = r_.values[9]; - r_.values[9] = r_.values[5]; - r_.values[5] = r_.values[1]; - r_.values[1] = tmp; - - tmp = r_.values[2]; - r_.values[2] = r_.values[10]; - r_.values[10] = tmp; - - tmp = r_.values[6]; - r_.values[6] = r_.values[14]; - r_.values[14] = tmp; - - tmp = r_.values[3]; - r_.values[3] = r_.values[7]; - r_.values[7] = r_.values[11]; - r_.values[11] = r_.values[15]; - r_.values[15] = tmp; - for(int i = 0; i < 16; ++i) { - r_.values[i] = simde_x_aes_inv_s_box[r_.values[i]]; - } - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaesdq_u8 - #define vaesdq_u8(data, key) simde_vaesdq_u8((data), (key)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaesmcq_u8(simde_uint8x16_t data) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_AES) - return vaesmcq_u8(data); - #else - /* ref: https://github.com/kokke/tiny-AES-c/blob/master/aes.c */ - simde_uint8x16_private - a_ = simde_uint8x16_to_private(data); - uint8_t i; - uint8_t Tmp, Tm, t; - for (i = 0; i < 4; ++i) - { - t = a_.values[i*4+0]; - Tmp = a_.values[i*4+0] ^ a_.values[i*4+1] ^ a_.values[i*4+2] ^ a_.values[i*4+3] ; - Tm = a_.values[i*4+0] ^ a_.values[i*4+1] ; Tm = simde_xtime(Tm); a_.values[i*4+0] ^= Tm ^ Tmp ; - Tm = a_.values[i*4+1] ^ a_.values[i*4+2] ; Tm = simde_xtime(Tm); a_.values[i*4+1] ^= Tm ^ Tmp ; - Tm = a_.values[i*4+2] ^ a_.values[i*4+3] ; Tm = simde_xtime(Tm); a_.values[i*4+2] ^= Tm ^ Tmp ; - Tm = a_.values[i*4+3] ^ t ; Tm = simde_xtime(Tm); a_.values[i*4+3] ^= Tm ^ Tmp ; - } - return simde_uint8x16_from_private(a_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaesmcq_u8 - #define vaesmcq_u8(data) simde_vaesmcq_u8((data)) -#endif - -static uint8_t Multiply(uint8_t x, uint8_t y) -{ - return (((y & 1) * x) ^ - ((y>>1 & 1) * simde_xtime(x)) ^ - ((y>>2 & 1) * simde_xtime(simde_xtime(x))) ^ - ((y>>3 & 1) * simde_xtime(simde_xtime(simde_xtime(x)))) ^ - ((y>>4 & 1) * simde_xtime(simde_xtime(simde_xtime(simde_xtime(x)))))); /* this last call to simde_xtime() can be omitted */ -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vaesimcq_u8(simde_uint8x16_t data) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_AES) - return vaesimcq_u8(data); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(data), - r_; - /* ref: simde/simde/x86/aes.h */ - #if defined(SIMDE_X86_AES_NATIVE) - r_.m128i = _mm_aesimc_si128(a_.m128i); - #else - int Nb = simde_x_aes_Nb; - // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3 - uint8_t i, j, col[4], res[4]; - - for (j = 0; j < Nb; j++) { - for (i = 0; i < 4; i++) { - col[i] = a_.values[Nb*j+i]; - } - - //coef_mult(k, col, res); - simde_x_aes_coef_mult_lookup(4, col, res); - - for (i = 0; i < 4; i++) { - r_.values[Nb*j+i] = res[i]; - } - } - #endif - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vaesimcq_u8 - #define vaesimcq_u8(data) simde_vaesimcq_u8((data)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_AES_H) */ -/* :: End simde/arm/neon/aes.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/and.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_AND_H) -#define SIMDE_ARM_NEON_AND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vand_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_s8 - #define vand_s8(a, b) simde_vand_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vand_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_s16 - #define vand_s16(a, b) simde_vand_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vand_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_s32 - #define vand_s32(a, b) simde_vand_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vand_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_s64 - #define vand_s64(a, b) simde_vand_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vand_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_u8 - #define vand_u8(a, b) simde_vand_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vand_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_u16 - #define vand_u16(a, b) simde_vand_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vand_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_u32 - #define vand_u32(a, b) simde_vand_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vand_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vand_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_and_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vand_u64 - #define vand_u64(a, b) simde_vand_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vandq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_s8 - #define vandq_s8(a, b) simde_vandq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vandq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_s16 - #define vandq_s16(a, b) simde_vandq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vandq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_s32 - #define vandq_s32(a, b) simde_vandq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vandq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_and(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_s64 - #define vandq_s64(a, b) simde_vandq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vandq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_u8 - #define vandq_u8(a, b) simde_vandq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vandq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_u16 - #define vandq_u16(a, b) simde_vandq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vandq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_u32 - #define vandq_u32(a, b) simde_vandq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vandq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vandq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_and(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_and(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values & b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vandq_u64 - #define vandq_u64(a, b) simde_vandq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_AND_H) */ -/* :: End simde/arm/neon/and.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/bcax.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Atharva Nimbalkar - */ - -#if !defined(SIMDE_ARM_NEON_BCAX_H) -#define SIMDE_ARM_NEON_BCAX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/eor.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_EOR_H) -#define SIMDE_ARM_NEON_EOR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_veor_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_s8 - #define veor_s8(a, b) simde_veor_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_veor_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_s16 - #define veor_s16(a, b) simde_veor_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_veor_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_s32 - #define veor_s32(a, b) simde_veor_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_veor_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_s64 - #define veor_s64(a, b) simde_veor_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_veor_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_u8 - #define veor_u8(a, b) simde_veor_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_veor_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_u16 - #define veor_u16(a, b) simde_veor_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_veor_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_u32 - #define veor_u32(a, b) simde_veor_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_veor_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veor_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_xor_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veor_u64 - #define veor_u64(a, b) simde_veor_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_veorq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_s8 - #define veorq_s8(a, b) simde_veorq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_veorq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_s16 - #define veorq_s16(a, b) simde_veorq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_veorq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_s32 - #define veorq_s32(a, b) simde_veorq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_veorq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_xor(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_s64 - #define veorq_s64(a, b) simde_veorq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_veorq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_u8 - #define veorq_u8(a, b) simde_veorq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_veorq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_u16 - #define veorq_u16(a, b) simde_veorq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_veorq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_xor(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_u32 - #define veorq_u32(a, b) simde_veorq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_veorq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return veorq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_xor(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_xor(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values ^ b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef veorq_u64 - #define veorq_u64(a, b) simde_veorq_u64((a), (b)) -#endif - -// Note: EOR3 instructions are implemented only when FEAT_SHA3 is implemented. -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_veor3q_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_s8(a, b, c); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b), - c_ = simde_int8x16_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_s8 - #define veor3q_s8(a, b, c) simde_veor3q_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_veor3q_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_s16(a, b, c); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - c_ = simde_int16x8_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_s16 - #define veor3q_s16(a, b, c) simde_veor3q_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_veor3q_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_s32(a, b, c); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - c_ = simde_int32x4_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_s32 - #define veor3q_s32(a, b, c) simde_veor3q_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_veor3q_s64(simde_int64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_s64(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b), - c_ = simde_int64x2_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_s64 - #define veor3q_s64(a, b, c) simde_veor3q_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_veor3q_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_u8(a, b, c); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b), - c_ = simde_uint8x16_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_u8 - #define veor3q_u8(a, b, c) simde_veor3q_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_veor3q_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_u16(a, b, c); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b), - c_ = simde_uint16x8_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_u16 - #define veor3q_u16(a, b, c) simde_veor3q_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_veor3q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_u32(a, b, c); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_u32 - #define veor3q_u32(a, b, c) simde_veor3q_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_veor3q_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return veor3q_u64(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b), - c_ = simde_uint64x2_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] ^ b_.values[i] ^ c_.values[i]; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef veor3q_u64 - #define veor3q_u64(a, b, c) simde_veor3q_u64((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_EOR_H) */ -/* :: End simde/arm/neon/eor.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/bic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_BIC_H) -#define SIMDE_ARM_NEON_BIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/dup_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_DUP_N_H) -#define SIMDE_ARM_NEON_DUP_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vdup_n_f16(simde_float16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vdup_n_f16(value); - #else - simde_float16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_float16x4_from_private(r_); - #endif -} -#define simde_vmov_n_f16 simde_vdup_n_f16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_f16 - #define vdup_n_f16(value) simde_vdup_n_f16((value)) - #undef vmov_n_f16 - #define vmov_n_f16(value) simde_vmov_n_f16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vdup_n_f32(float value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_f32(value); - #else - simde_float32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_float32x2_from_private(r_); - #endif -} -#define simde_vmov_n_f32 simde_vdup_n_f32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_f32 - #define vdup_n_f32(value) simde_vdup_n_f32((value)) - #undef vmov_n_f32 - #define vmov_n_f32(value) simde_vmov_n_f32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vdup_n_f64(double value) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdup_n_f64(value); - #else - simde_float64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_float64x1_from_private(r_); - #endif -} -#define simde_vmov_n_f64 simde_vdup_n_f64 -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_n_f64 - #define vdup_n_f64(value) simde_vdup_n_f64((value)) - #undef vmov_n_f64 - #define vmov_n_f64(value) simde_vmov_n_f64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vdup_n_s8(int8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_s8(value); - #else - simde_int8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi8(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#define simde_vmov_n_s8 simde_vdup_n_s8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_s8 - #define vdup_n_s8(value) simde_vdup_n_s8((value)) - #undef vmov_n_s8 - #define vmov_n_s8(value) simde_vmov_n_s8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vdup_n_s16(int16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_s16(value); - #else - simde_int16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi16(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#define simde_vmov_n_s16 simde_vdup_n_s16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_s16 - #define vdup_n_s16(value) simde_vdup_n_s16((value)) - #undef vmov_n_s16 - #define vmov_n_s16(value) simde_vmov_n_s16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vdup_n_s32(int32_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_s32(value); - #else - simde_int32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi32(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#define simde_vmov_n_s32 simde_vdup_n_s32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_s32 - #define vdup_n_s32(value) simde_vdup_n_s32((value)) - #undef vmov_n_s32 - #define vmov_n_s32(value) simde_vmov_n_s32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vdup_n_s64(int64_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_s64(value); - #else - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_int64x1_from_private(r_); - #endif -} -#define simde_vmov_n_s64 simde_vdup_n_s64 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_s64 - #define vdup_n_s64(value) simde_vdup_n_s64((value)) - #undef vmov_n_s64 - #define vmov_n_s64(value) simde_vmov_n_s64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vdup_n_u8(uint8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_u8(value); - #else - simde_uint8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi8(HEDLEY_STATIC_CAST(int8_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#define simde_vmov_n_u8 simde_vdup_n_u8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_u8 - #define vdup_n_u8(value) simde_vdup_n_u8((value)) - #undef vmov_n_u8 - #define vmov_n_u8(value) simde_vmov_n_u8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vdup_n_u16(uint16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_u16(value); - #else - simde_uint16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#define simde_vmov_n_u16 simde_vdup_n_u16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_u16 - #define vdup_n_u16(value) simde_vdup_n_u16((value)) - #undef vmov_n_u16 - #define vmov_n_u16(value) simde_vmov_n_u16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vdup_n_u32(uint32_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_u32(value); - #else - simde_uint32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#define simde_vmov_n_u32 simde_vdup_n_u32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_u32 - #define vdup_n_u32(value) simde_vdup_n_u32((value)) - #undef vmov_n_u32 - #define vmov_n_u32(value) simde_vmov_n_u32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vdup_n_u64(uint64_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_u64(value); - #else - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_uint64x1_from_private(r_); - #endif -} -#define simde_vmov_n_u64 simde_vdup_n_u64 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_u64 - #define vdup_n_u64(value) simde_vdup_n_u64((value)) - #undef vmov_n_u64 - #define vmov_n_u64(value) simde_vmov_n_u64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vdupq_n_f16(simde_float16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vdupq_n_f16(value); - #else - simde_float16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_float16x8_from_private(r_); - #endif -} -#define simde_vmovq_n_f16 simde_vdupq_n_f16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_f16 - #define vdupq_n_f16(value) simde_vdupq_n_f16((value)) - #undef vmovq_n_f16 - #define vmovq_n_f16(value) simde_vmovq_n_f16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vdupq_n_f32(float value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) value; - return vec_splats(value); - #else - simde_float32x4_private r_; - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_set1_ps(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#define simde_vmovq_n_f32 simde_vdupq_n_f32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_f32 - #define vdupq_n_f32(value) simde_vdupq_n_f32((value)) - #undef vmovq_n_f32 - #define vmovq_n_f32(value) simde_vmovq_n_f32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vdupq_n_f64(double value) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdupq_n_f64(value); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - (void) value; - return vec_splats(value); - #else - simde_float64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_set1_pd(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#define simde_vmovq_n_f64 simde_vdupq_n_f64 -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_f64 - #define vdupq_n_f64(value) simde_vdupq_n_f64((value)) - #undef vmovq_n_f64 - #define vmovq_n_f64(value) simde_vmovq_n_f64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vdupq_n_s8(int8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_s8(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_int8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi8(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#define simde_vmovq_n_s8 simde_vdupq_n_s8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_s8 - #define vdupq_n_s8(value) simde_vdupq_n_s8((value)) - #undef vmovq_n_s8 - #define vmovq_n_s8(value) simde_vmovq_n_s8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vdupq_n_s16(int16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_s16(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_int16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi16(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#define simde_vmovq_n_s16 simde_vdupq_n_s16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_s16 - #define vdupq_n_s16(value) simde_vdupq_n_s16((value)) - #undef vmovq_n_s16 - #define vmovq_n_s16(value) simde_vmovq_n_s16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdupq_n_s32(int32_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_s32(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_int32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi32(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#define simde_vmovq_n_s32 simde_vdupq_n_s32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_s32 - #define vdupq_n_s32(value) simde_vdupq_n_s32((value)) - #undef vmovq_n_s32 - #define vmovq_n_s32(value) simde_vmovq_n_s32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vdupq_n_s64(int64_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_s64(value); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(HEDLEY_STATIC_CAST(signed long long, value)); - #else - simde_int64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - r_.m128i = _mm_set1_epi64x(value); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_splat(value); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#define simde_vmovq_n_s64 simde_vdupq_n_s64 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_s64 - #define vdupq_n_s64(value) simde_vdupq_n_s64((value)) - #undef vmovq_n_s64 - #define vmovq_n_s64(value) simde_vmovq_n_s64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vdupq_n_u8(uint8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_u8(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_uint8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #elif defined (SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#define simde_vmovq_n_u8 simde_vdupq_n_u8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_u8 - #define vdupq_n_u8(value) simde_vdupq_n_u8((value)) - #undef vmovq_n_u8 - #define vmovq_n_u8(value) simde_vmovq_n_u8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vdupq_n_u16(uint16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_u16(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #elif defined (SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#define simde_vmovq_n_u16 simde_vdupq_n_u16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_u16 - #define vdupq_n_u16(value) simde_vdupq_n_u16((value)) - #undef vmovq_n_u16 - #define vmovq_n_u16(value) simde_vmovq_n_u16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdupq_n_u32(uint32_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_u32(value); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(value); - #else - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #elif defined (SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#define simde_vmovq_n_u32 simde_vdupq_n_u32 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_u32 - #define vdupq_n_u32(value) simde_vdupq_n_u32((value)) - #undef vmovq_n_u32 - #define vmovq_n_u32(value) simde_vmovq_n_u32((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vdupq_n_u64(uint64_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_u64(value); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)); - #else - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - r_.m128i = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #elif defined (SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, value)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#define simde_vmovq_n_u64 simde_vdupq_n_u64 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_u64 - #define vdupq_n_u64(value) simde_vdupq_n_u64((value)) - #undef vmovq_n_u64 - #define vmovq_n_u64(value) simde_vmovq_n_u64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vdup_n_p8(simde_poly8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_p8(value); - #else - simde_poly8x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#define simde_vmov_n_p8 simde_vdup_n_p8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_p8 - #define vdup_n_p8(value) simde_vdup_n_p8((value)) - #undef vmov_n_p8 - #define vmov_n_p8(value) simde_vmov_n_p8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vdup_n_p16(simde_poly16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdup_n_p16(value); - #else - simde_poly16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#define simde_vmov_n_p16 simde_vdup_n_p16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_n_p16 - #define vdup_n_p16(value) simde_vdup_n_p16((value)) - #undef vmov_n_p16 - #define vmov_n_p16(value) simde_vmov_n_p16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vdup_n_p64(simde_poly64_t value) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vdup_n_p64(value); - #else - simde_poly64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_n_p64 - #define vdup_n_p64(value) simde_vdup_n_p64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vdupq_n_p8(simde_poly8_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_p8(value); - #else - simde_poly8x16_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#define simde_vmovq_n_p8 simde_vdupq_n_p8 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_p8 - #define vdupq_n_p8(value) simde_vdupq_n_p8((value)) - #undef vmovq_n_p8 - #define vmovq_n_p8(value) simde_vmovq_n_p8((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vdupq_n_p16(simde_poly16_t value) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_p16(value); - #else - simde_poly16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#define simde_vmovq_n_p16 simde_vdupq_n_p16 -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_p16 - #define vdupq_n_p16(value) simde_vdupq_n_p16((value)) - #undef vmovq_n_p16 - #define vmovq_n_p16(value) simde_vmovq_n_p16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vdupq_n_p64(simde_poly64_t value) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vdupq_n_p64(value); - #else - simde_poly64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_p64 - #define vdupq_n_p64(value) simde_vdupq_n_p64((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vdup_n_bf16(simde_bfloat16_t value) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vdup_n_bf16(value); - #else - simde_bfloat16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_n_bf16 - #define vdup_n_bf16(value) simde_vdup_n_bf16((value)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vdupq_n_bf16(simde_bfloat16_t value) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vdupq_n_bf16(value); - #else - simde_bfloat16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = value; - } - - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_n_bf16 - #define vdupq_n_bf16(value) simde_vdupq_n_bf16((value)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_DUP_N_H) */ -/* :: End simde/arm/neon/dup_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_s8 - #define vbic_s8(a, b) simde_vbic_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_s16 - #define vbic_s16(a, b) simde_vbic_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_s32 - #define vbic_s32(a, b) simde_vbic_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_s64 - #define vbic_s64(a, b) simde_vbic_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_u8(a, b); - #else - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_u8 - #define vbic_u8(a, b) simde_vbic_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_u16(a, b); - #else - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_u16 - #define vbic_u16(a, b) simde_vbic_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_u32(a, b); - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_u32 - #define vbic_u32(a, b) simde_vbic_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbic_u64(a, b); - #else - simde_uint64x1_private - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b), - r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbic_u64 - #define vbic_u64(a, b) simde_vbic_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_s8 - #define vbicq_s8(a, b) simde_vbicq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_s16 - #define vbicq_s16(a, b) simde_vbicq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_s32 - #define vbicq_s32(a, b) simde_vbicq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_andc(a, b); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_s64 - #define vbicq_s64(a, b) simde_vbicq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_u8 - #define vbicq_u8(a, b) simde_vbicq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_uint16x8_private - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_u16 - #define vbicq_u16(a, b) simde_vbicq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_andc(a, b); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_u32 - #define vbicq_u32(a, b) simde_vbicq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbicq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_andc(a, b); - #else - simde_uint64x2_private - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] & ~b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbicq_u64 - #define vbicq_u64(a, b) simde_vbicq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_BIC_H) */ -/* :: End simde/arm/neon/bic.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vbcaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_u8(a, b, c); - #else - return simde_veorq_u8(a, simde_vbicq_u8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_u8 - #define vbcaxq_u8(a, b, c) simde_vbcaxq_u8(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vbcaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_u16(a, b, c); - #else - return simde_veorq_u16(a, simde_vbicq_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_u16 - #define vbcaxq_u16(a, b, c) simde_vbcaxq_u16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vbcaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_u32(a, b, c); - #else - return simde_veorq_u32(a, simde_vbicq_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_u32 - #define vbcaxq_u32(a, b, c) simde_vbcaxq_u32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vbcaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_u64(a, b, c); - #else - return simde_veorq_u64(a, simde_vbicq_u64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_u64 - #define vbcaxq_u64(a, b, c) simde_vbcaxq_u64(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vbcaxq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_s8(a, b, c); - #else - return simde_veorq_s8(a, simde_vbicq_s8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_s8 - #define vbcaxq_s8(a, b, c) simde_vbcaxq_s8(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vbcaxq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_s16(a, b, c); - #else - return simde_veorq_s16(a,simde_vbicq_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_s16 - #define vbcaxq_s16(a, b, c) simde_vbcaxq_s16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vbcaxq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_s32(a, b, c); - #else - return simde_veorq_s32(a, simde_vbicq_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_s32 - #define vbcaxq_s32(a, b, c) simde_vbcaxq_s32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vbcaxq_s64(simde_int64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vbcaxq_s64(a, b, c); - #else - return simde_veorq_s64(a, simde_vbicq_s64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vbcaxq_s64 - #define vbcaxq_s64(a, b, c) simde_vbcaxq_s64(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_BCAX_H) */ -/* :: End simde/arm/neon/bcax.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/bsl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_BSL_H) -#define SIMDE_ARM_NEON_BSL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vbsl_f16(simde_uint16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vbsl_f16(a, b, c); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(b)), - c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_f16_u16(simde_uint16x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_f16 - #define vbsl_f16(a, b, c) simde_vbsl_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vbsl_f32(simde_uint32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_f32(a, b, c); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(b)), - c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_f32_u32(simde_uint32x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_f32 - #define vbsl_f32(a, b, c) simde_vbsl_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vbsl_f64(simde_uint64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vbsl_f64(a, b, c); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(b)), - c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_f64_u64(simde_uint64x1_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vbsl_f64 - #define vbsl_f64(a, b, c) simde_vbsl_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vbsl_s8(simde_uint8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_s8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(b)), - c_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_s8_u8(simde_uint8x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_s8 - #define vbsl_s8(a, b, c) simde_vbsl_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vbsl_s16(simde_uint16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_s16(a, b, c); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(b)), - c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_s16_u16(simde_uint16x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_s16 - #define vbsl_s16(a, b, c) simde_vbsl_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vbsl_s32(simde_uint32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_s32(a, b, c); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(b)), - c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_s32_u32(simde_uint32x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_s32 - #define vbsl_s32(a, b, c) simde_vbsl_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vbsl_s64(simde_uint64x1_t a, simde_int64x1_t b, simde_int64x1_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_s64(a, b, c); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(b)), - c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpret_s64_u64(simde_uint64x1_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_s64 - #define vbsl_s64(a, b, c) simde_vbsl_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_u8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b), - c_ = simde_uint8x8_to_private(c); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_u8 - #define vbsl_u8(a, b, c) simde_vbsl_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vbsl_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_u16(a, b, c); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b), - c_ = simde_uint16x4_to_private(c); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_u16 - #define vbsl_u16(a, b, c) simde_vbsl_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vbsl_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_u32(a, b, c); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b), - c_ = simde_uint32x2_to_private(c); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_u32 - #define vbsl_u32(a, b, c) simde_vbsl_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vbsl_u64(simde_uint64x1_t a, simde_uint64x1_t b, simde_uint64x1_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_u64(a, b, c); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b), - c_ = simde_uint64x1_to_private(c); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_u64 - #define vbsl_u64(a, b, c) simde_vbsl_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vbslq_f16(simde_uint16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vbslq_f16(a, b, c); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(b)), - c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_f16_u16(simde_uint16x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_f16 - #define vbslq_f16(a, b, c) simde_vbslq_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbslq_f32(simde_uint32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_f32(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(b)), - c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_f32_u32(simde_uint32x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_f32 - #define vbslq_f32(a, b, c) simde_vbslq_f32((a), (b), (c)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vbslq_f64(simde_uint64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vbslq_f64(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(b)), - c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_f64_u64(simde_uint64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vbslq_f64 - #define vbslq_f64(a, b, c) simde_vbslq_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vbslq_s8(simde_uint8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_s8(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(b)), - c_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_s8_u8(simde_uint8x16_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_s8 - #define vbslq_s8(a, b, c) simde_vbslq_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vbslq_s16(simde_uint16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_s16(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(b)), - c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_s16_u16(simde_uint16x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_s16 - #define vbslq_s16(a, b, c) simde_vbslq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vbslq_s32(simde_uint32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_s32(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(b)), - c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_s32_u32(simde_uint32x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_s32 - #define vbslq_s32(a, b, c) simde_vbslq_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vbslq_s64(simde_uint64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_s64(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return - simde_vreinterpretq_s64_s32( - simde_vbslq_s32( - simde_vreinterpretq_u32_u64(a), - simde_vreinterpretq_s32_s64(b), - simde_vreinterpretq_s32_s64(c) - ) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel( - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), c), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a)); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(b)), - c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(c)); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_vreinterpretq_s64_u64(simde_uint64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_s64 - #define vbslq_s64(a, b, c) simde_vbslq_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_u8(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b), - c_ = simde_uint8x16_to_private(c); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_u8 - #define vbslq_u8(a, b, c) simde_vbslq_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vbslq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_u16(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b), - c_ = simde_uint16x8_to_private(c); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_u16 - #define vbslq_u16(a, b, c) simde_vbslq_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vbslq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_u32(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_sel(c, b, a); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_u32 - #define vbslq_u32(a, b, c) simde_vbslq_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vbslq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_u64(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return - simde_vreinterpretq_u64_u32( - simde_vbslq_u32( - simde_vreinterpretq_u32_u64(a), - simde_vreinterpretq_u32_u64(b), - simde_vreinterpretq_u32_u64(c) - ) - ); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b), - c_ = simde_uint64x2_to_private(c); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_u64 - #define vbslq_u64(a, b, c) simde_vbslq_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vbsl_p8(simde_uint8x8_t a, simde_poly8x8_t b, simde_poly8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_p8(a, b, c); - #else - simde_poly8x8_private - r_, - b_ = simde_poly8x8_to_private(b), - c_ = simde_poly8x8_to_private(c); - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_p8 - #define vbsl_p8(a, b, c) simde_vbsl_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vbsl_p16(simde_uint16x4_t a, simde_poly16x4_t b, simde_poly16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbsl_p16(a, b, c); - #else - simde_poly16x4_private - r_, - b_ = simde_poly16x4_to_private(b), - c_ = simde_poly16x4_to_private(c); - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbsl_p16 - #define vbsl_p16(a, b, c) simde_vbsl_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vbsl_p64(simde_uint64x1_t a, simde_poly64x1_t b, simde_poly64x1_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vbsl_p64(a, b, c); - #else - simde_poly64x1_private - r_, - b_ = simde_poly64x1_to_private(b), - c_ = simde_poly64x1_to_private(c); - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbsl_p64 - #define vbsl_p64(a, b, c) simde_vbsl_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vbslq_p8(simde_uint8x16_t a, simde_poly8x16_t b, simde_poly8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_p8(a, b, c); - #else - simde_poly8x16_private - r_, - b_ = simde_poly8x16_to_private(b), - c_ = simde_poly8x16_to_private(c); - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_p8 - #define vbslq_p8(a, b, c) simde_vbslq_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vbslq_p16(simde_uint16x8_t a, simde_poly16x8_t b, simde_poly16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vbslq_p16(a, b, c); - #else - simde_poly16x8_private - r_, - b_ = simde_poly16x8_to_private(b), - c_ = simde_poly16x8_to_private(c); - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vbslq_p16 - #define vbslq_p16(a, b, c) simde_vbslq_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vbslq_p64(simde_uint64x2_t a, simde_poly64x2_t b, simde_poly64x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vbslq_p64(a, b, c); - #else - simde_poly64x2_private - r_, - b_ = simde_poly64x2_to_private(b), - c_ = simde_poly64x2_to_private(c); - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbslq_p64 - #define vbslq_p64(a, b, c) simde_vbslq_p64((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_BSL_H) */ -/* :: End simde/arm/neon/bsl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cadd_rot270.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CADD_ROT270_H) -#define SIMDE_ARM_NEON_CADD_ROT270_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -_Pragma("clang diagnostic ignored \"-Wimplicit-float-conversion\"") -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot270_f16(a, b); - #else - simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), a_.values[2 * i + 1]); - } - #endif - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot270_f16 - #define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f16(a, b); - #else - simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), a_.values[2 * i + 1]); - } - #endif - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f16 - #define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, simde_float32x2_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot270_f32(a, b); - #else - simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot270_f32 - #define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, simde_float32x4_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f32(a, b); - #else - simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f32 - #define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) -{ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f64(a, b); - #else - simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f64 - #define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ -/* :: End simde/arm/neon/cadd_rot270.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cadd_rot90.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CADD_ROT90_H) -#define SIMDE_ARM_NEON_CADD_ROT90_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -_Pragma("clang diagnostic ignored \"-Wimplicit-float-conversion\"") -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot90_f16(a, b); - #else - simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i + 1])), a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } - #endif - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot90_f16 - #define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f16(a, b); - #else - simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i + 1])), a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } - #endif - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot90_f16 - #define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, simde_float32x2_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot90_f32(a, b); - #else - simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot90_f32 - #define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, simde_float32x4_t b) -{ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f32(a, b); - #else - simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot90_f32 - #define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) -{ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f64(a, b); - #else - simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot90_f64 -#define vcaddq_rot90_f64(a, b) simde_vcaddq_rot90_f64(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ -/* :: End simde/arm/neon/cadd_rot90.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cage.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - * 2021 Atharva Nimbalkar - */ - -#if !defined(SIMDE_ARM_NEON_CAGE_H) -#define SIMDE_ARM_NEON_CAGE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cge.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_CGE_H) -#define SIMDE_ARM_NEON_CGE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcgeh_f16(simde_float16_t a, simde_float16_t b){ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcgeh_f16(a, b)); - #else - return (simde_float16_to_float32(a) >= simde_float16_to_float32(b)) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgeh_f16 - #define vcgeh_f16(a, b) simde_vcgeh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgeq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgeq_f16(a, b); - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgeq_f16 - #define vcgeq_f16(a, b) simde_vcgeq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgeq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); - #else - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_cmpge_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_f32 - #define vcgeq_f32(a, b) simde_vcgeq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgeq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgeq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); - #else - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgeq_f64 - #define vcgeq_f64(a, b) simde_vcgeq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgeq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_uint8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(a_.m128i, b_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_s8 - #define vcgeq_s8(a, b) simde_vcgeq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgeq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(a_.m128i, b_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_s16 - #define vcgeq_s16(a, b) simde_vcgeq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgeq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, b_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_s32 - #define vcgeq_s32(a, b) simde_vcgeq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgeq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgeq_s64(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(a_.m128i, b_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgeq_s64 - #define vcgeq_s64(a, b) simde_vcgeq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = - _mm_cmpeq_epi8( - _mm_min_epu8(b_.m128i, a_.m128i), - b_.m128i - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_u8 - #define vcgeq_u8(a, b) simde_vcgeq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_cmpeq_epi16( - _mm_min_epu16(b_.m128i, a_.m128i), - b_.m128i - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi16(INT16_MIN); - r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_u16 - #define vcgeq_u16(a, b) simde_vcgeq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgeq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_cmpeq_epi32( - _mm_min_epu32(b_.m128i, a_.m128i), - b_.m128i - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi32(INT32_MIN); - r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_ge(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgeq_u32 - #define vcgeq_u32(a, b) simde_vcgeq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgeq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = - _mm_cmpeq_epi64( - _mm_min_epu64(b_.m128i, a_.m128i), - b_.m128i - ); - #elif defined(SIMDE_X86_SSE4_2_NATIVE) - __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); - r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgeq_u64 - #define vcgeq_u64(a, b) simde_vcgeq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcge_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcge_f16(a, b); - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcge_f16 - #define vcge_f16(a, b) simde_vcge_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcge_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_f32(a, b); - #else - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_f32 - #define vcge_f32(a, b) simde_vcge_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcge_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcge_f64(a, b); - #else - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcge_f64 - #define vcge_f64(a, b) simde_vcge_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcge_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - simde_uint8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(a_.m64, b_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_s8 - #define vcge_s8(a, b) simde_vcge_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcge_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - simde_uint16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(a_.m64, b_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_s16 - #define vcge_s16(a, b) simde_vcge_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcge_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(a_.m64, b_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_s32 - #define vcge_s32(a, b) simde_vcge_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcge_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcge_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcge_s64 - #define vcge_s64(a, b) simde_vcge_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcge_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi8(INT8_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_u8 - #define vcge_u8(a, b) simde_vcge_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcge_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi16(INT16_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_u16 - #define vcge_u16(a, b) simde_vcge_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcge_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcge_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi32(INT32_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcge_u32 - #define vcge_u32(a, b) simde_vcge_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcge_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcge_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcge_u64 - #define vcge_u64(a, b) simde_vcge_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcged_f64(simde_float64_t a, simde_float64_t b){ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcged_f64(a, b)); - #else - return (a >= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcged_f64 - #define vcged_f64(a, b) simde_vcged_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcged_s64(int64_t a, int64_t b){ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcged_s64(a, b)); - #else - return (a >= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcged_s64 - #define vcged_s64(a, b) simde_vcged_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcged_u64(uint64_t a, uint64_t b){ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcged_u64(a, b)); - #else - return (a >= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcged_u64 - #define vcged_u64(a, b) simde_vcged_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcges_f32(simde_float32_t a, simde_float32_t b){ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcges_f32(a, b)); - #else - return (a >= b) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcges_f32 - #define vcges_f32(a, b) simde_vcges_f32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CGE_H) */ -/* :: End simde/arm/neon/cge.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcageh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcageh_f16(a, b); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - return (simde_math_fabsf(a_) >= simde_math_fabsf(b_)) ? UINT16_MAX : UINT16_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcageh_f16 - #define vcageh_f16(a, b) simde_vcageh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcages_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcages_f32(a, b); - #else - return (simde_math_fabsf(a) >= simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcages_f32 - #define vcages_f32(a, b) simde_vcages_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcaged_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcaged_f64(a, b); - #else - return (simde_math_fabs(a) >= simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaged_f64 - #define vcaged_f64(a, b) simde_vcaged_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcage_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcage_f16(a, b); - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcage_f16 - #define vcage_f16(a, b) simde_vcage_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcage_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcage_f32(a, b); - #else - return simde_vcge_f32(simde_vabs_f32(a), simde_vabs_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcage_f32 - #define vcage_f32(a, b) simde_vcage_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcage_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcage_f64(a, b); - #else - return simde_vcge_f64(simde_vabs_f64(a), simde_vabs_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcage_f64 - #define vcage_f64(a, b) simde_vcage_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcageq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcageq_f16(a, b); - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]); - } - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcageq_f16 - #define vcageq_f16(a, b) simde_vcageq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcageq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcageq_f32(a, b); - #else - return simde_vcgeq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcageq_f32 - #define vcageq_f32(a, b) simde_vcageq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcageq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcageq_f64(a, b); - #else - return simde_vcgeq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcageq_f64 - #define vcageq_f64(a, b) simde_vcageq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CAGE_H) */ -/* :: End simde/arm/neon/cage.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cagt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_CAGT_H) -#define SIMDE_ARM_NEON_CAGT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cgt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CGT_H) -#define SIMDE_ARM_NEON_CGT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/get_low.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_GET_LOW_H) -#define SIMDE_ARM_NEON_GET_LOW_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vget_low_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vget_low_f16(a); - #else - simde_float16x4_private r_; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_f16 - #define vget_low_f16(a) simde_vget_low_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vget_low_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_f32(a); - #else - simde_float32x2_private r_; - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_f32 - #define vget_low_f32(a) simde_vget_low_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vget_low_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vget_low_f64(a); - #else - simde_float64x1_private r_; - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vget_low_f64 - #define vget_low_f64(a) simde_vget_low_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vget_low_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_s8(a); - #else - simde_int8x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_s8 - #define vget_low_s8(a) simde_vget_low_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vget_low_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_s16(a); - #else - simde_int16x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_s16 - #define vget_low_s16(a) simde_vget_low_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vget_low_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_s32(a); - #else - simde_int32x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_s32 - #define vget_low_s32(a) simde_vget_low_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vget_low_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_s64(a); - #else - simde_int64x1_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_s64 - #define vget_low_s64(a) simde_vget_low_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vget_low_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_u8(a); - #else - simde_uint8x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_u8 - #define vget_low_u8(a) simde_vget_low_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vget_low_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_u16(a); - #else - simde_uint16x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_u16 - #define vget_low_u16(a) simde_vget_low_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vget_low_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_u32(a); - #else - simde_uint32x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_u32 - #define vget_low_u32(a) simde_vget_low_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vget_low_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_u64(a); - #else - simde_uint64x1_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_movepi64_pi64(a_.m128i); - #else - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.values = __builtin_shufflevector(a_.values, a_.values, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - #endif - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_u64 - #define vget_low_u64(a) simde_vget_low_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vget_low_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_p8(a); - #else - simde_poly8x8_private r_; - simde_poly8x16_private a_ = simde_poly8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_p8 - #define vget_low_p8(a) simde_vget_low_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vget_low_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vget_low_p16(a); - #else - simde_poly16x4_private r_; - simde_poly16x8_private a_ = simde_poly16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_low_p16 - #define vget_low_p16(a) simde_vget_low_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vget_low_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vget_low_p64(a); - #else - simde_poly64x1_private r_; - simde_poly64x2_private a_ = simde_poly64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_low_p64 - #define vget_low_p64(a) simde_vget_low_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vget_low_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vget_low_bf16(a); - #else - simde_bfloat16x4_private r_; - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i]; - } - - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_low_bf16 - #define vget_low_bf16(a) simde_vget_low_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_GET_LOW_H) */ -/* :: End simde/arm/neon/get_low.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgtd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgtd_f64(a, b)); - #else - return (a > b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtd_f64 - #define vcgtd_f64(a, b) simde_vcgtd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgtd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgtd_s64(a, b)); - #else - return (a > b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtd_s64 - #define vcgtd_s64(a, b) simde_vcgtd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgtd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgtd_u64(a, b)); - #else - return (a > b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtd_u64 - #define vcgtd_u64(a, b) simde_vcgtd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcgth_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcgth_f16(a, b)); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - - return (a_ > b_) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgth_f16 - #define vcgth_f16(a, b) simde_vcgth_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcgts_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcgts_f32(a, b)); - #else - return (a > b) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgts_f32 - #define vcgts_f32(a, b) simde_vcgts_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgtq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgtq_f16(a, b); - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgtq_f16 - #define vcgtq_f16(a, b) simde_vcgtq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgtq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); - #else - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_f32 - #define vcgtq_f32(a, b) simde_vcgtq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgtq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); - #else - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtq_f64 - #define vcgtq_f64(a, b) simde_vcgtq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgtq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_uint8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpgt_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_s8 - #define vcgtq_s8(a, b) simde_vcgtq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgtq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpgt_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_s16 - #define vcgtq_s16(a, b) simde_vcgtq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgtq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpgt_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_s32 - #define vcgtq_s32(a, b) simde_vcgtq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgtq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtq_s64(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_cmpgt_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a_.m128i, b_.m128i), _mm_sub_epi64(b_.m128i, a_.m128i)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a_.m128i, b_.m128i)); - r_.m128i = _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtq_s64 - #define vcgtq_s64(a, b) simde_vcgtq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgtq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_subs_epu8(a_.m128i, b_.m128i); - r_.m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_u8 - #define vcgtq_u8(a, b) simde_vcgtq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgtq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_subs_epu16(a_.m128i, b_.m128i); - r_.m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_u16 - #define vcgtq_u16(a, b) simde_vcgtq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgtq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgtq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = - _mm_xor_si128( - _mm_cmpgt_epi32(a_.m128i, b_.m128i), - _mm_srai_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 31) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_gt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgtq_u32 - #define vcgtq_u32(a, b) simde_vcgtq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgtq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - __m128i sign_bit = _mm_set1_epi64x(INT64_MIN); - r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bit), _mm_xor_si128(b_.m128i, sign_bit)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtq_u64 - #define vcgtq_u64(a, b) simde_vcgtq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgt_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgt_f16(a, b); - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgt_f16 - #define vcgt_f16(a, b) simde_vcgt_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgt_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_f32(a, b); - #else - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_f32 - #define vcgt_f32(a, b) simde_vcgt_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgt_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgt_f64(a, b); - #else - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgt_f64 - #define vcgt_f64(a, b) simde_vcgt_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcgt_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - simde_uint8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_s8 - #define vcgt_s8(a, b) simde_vcgt_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgt_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - simde_uint16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_s16 - #define vcgt_s16(a, b) simde_vcgt_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgt_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_s32 - #define vcgt_s32(a, b) simde_vcgt_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgt_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgt_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgt_s64 - #define vcgt_s64(a, b) simde_vcgt_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcgt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bit = _mm_set1_pi8(INT8_MIN); - r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_u8 - #define vcgt_u8(a, b) simde_vcgt_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bit = _mm_set1_pi16(INT16_MIN); - r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_u16 - #define vcgt_u16(a, b) simde_vcgt_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcgt_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bit = _mm_set1_pi32(INT32_MIN); - r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcgt_u32 - #define vcgt_u32(a, b) simde_vcgt_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgt_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgt_u64 - #define vcgt_u64(a, b) simde_vcgt_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CGT_H) */ -/* :: End simde/arm/neon/cgt.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcagth_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcagth_f16(a, b); - #else - simde_float32_t - af = simde_float16_to_float32(a), - bf = simde_float16_to_float32(b); - return (simde_math_fabsf(af) > simde_math_fabsf(bf)) ? UINT16_MAX : UINT16_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcagth_f16 - #define vcagth_f16(a, b) simde_vcagth_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcagts_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcagts_f32(a, b); - #else - return (simde_math_fabsf(a) > simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcagts_f32 - #define vcagts_f32(a, b) simde_vcagts_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcagtd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcagtd_f64(a, b); - #else - return (simde_math_fabs(a) > simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcagtd_f64 - #define vcagtd_f64(a, b) simde_vcagtd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcagt_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcagt_f16(a, b); - #else - simde_uint16x4_private r_; - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcagt_f16 - #define vcagt_f16(a, b) simde_vcagt_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcagt_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcagt_f32(a, b); - #else - return simde_vcgt_f32(simde_vabs_f32(a), simde_vabs_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcagt_f32 - #define vcagt_f32(a, b) simde_vcagt_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcagt_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcagt_f64(a, b); - #else - return simde_vcgt_f64(simde_vabs_f64(a), simde_vabs_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcagt_f64 - #define vcagt_f64(a, b) simde_vcagt_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcagtq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcagtq_f16(a, b); - #else - simde_uint16x8_private r_; - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcagtq_f16 - #define vcagtq_f16(a, b) simde_vcagtq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcagtq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcagtq_f32(a, b); - #else - return simde_vcgtq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcagtq_f32 - #define vcagtq_f32(a, b) simde_vcagtq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcagtq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcagtq_f64(a, b); - #else - return simde_vcgtq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcagtq_f64 - #define vcagtq_f64(a, b) simde_vcagtq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */ -/* :: End simde/arm/neon/cagt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cale.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CALE_H) -#define SIMDE_ARM_NEON_CALE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcaleh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcaleh_f16(a, b); - #else - return simde_vcageh_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaleh_f16 - #define vcaleh_f16(a, b) simde_vcaleh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcales_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcales_f32(a, b); - #else - return simde_vcages_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcales_f32 - #define vcales_f32(a, b) simde_vcales_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcaled_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcaled_f64(a, b); - #else - return simde_vcaged_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaled_f64 - #define vcaled_f64(a, b) simde_vcaled_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcale_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcale_f16(a, b); - #else - return simde_vcage_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcale_f16 - #define vcale_f16(a, b) simde_vcale_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcale_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcale_f32(a, b); - #else - return simde_vcage_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcale_f32 - #define vcale_f32(a, b) simde_vcale_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcale_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcale_f64(a, b); - #else - return simde_vcage_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcale_f64 - #define vcale_f64(a, b) simde_vcale_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcaleq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcaleq_f16(a, b); - #else - return simde_vcageq_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaleq_f16 - #define vcaleq_f16(a, b) simde_vcaleq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcaleq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcaleq_f32(a, b); - #else - return simde_vcageq_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcaleq_f32 - #define vcaleq_f32(a, b) simde_vcaleq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcaleq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcaleq_f64(a, b); - #else - return simde_vcageq_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaleq_f64 - #define vcaleq_f64(a, b) simde_vcaleq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_cale_H) */ -/* :: End simde/arm/neon/cale.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/calt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CALT_H) -#define SIMDE_ARM_NEON_CALT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcalth_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcalth_f16(a, b); - #else - return simde_vcagth_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcalth_f16 - #define vcalth_f16(a, b) simde_vcalth_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcalts_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcalts_f32(a, b); - #else - return simde_vcagts_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcalts_f32 - #define vcalts_f32(a, b) simde_vcalts_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcaltd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcaltd_f64(a, b); - #else - return simde_vcagtd_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaltd_f64 - #define vcaltd_f64(a, b) simde_vcaltd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcalt_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcalt_f16(a, b); - #else - return simde_vcagt_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcalt_f16 - #define vcalt_f16(a, b) simde_vcalt_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcalt_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcalt_f32(a, b); - #else - return simde_vcagt_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcalt_f32 - #define vcalt_f32(a, b) simde_vcalt_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcalt_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcalt_f64(a, b); - #else - return simde_vcagt_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcalt_f64 - #define vcalt_f64(a, b) simde_vcalt_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcaltq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcaltq_f16(a, b); - #else - return simde_vcagtq_f16(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaltq_f16 - #define vcaltq_f16(a, b) simde_vcaltq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcaltq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcaltq_f32(a, b); - #else - return simde_vcagtq_f32(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcaltq_f32 - #define vcaltq_f32(a, b) simde_vcaltq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcaltq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcaltq_f64(a, b); - #else - return simde_vcagtq_f64(b, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcaltq_f64 - #define vcaltq_f64(a, b) simde_vcaltq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */ -/* :: End simde/arm/neon/calt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ceq.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CEQ_H) -#define SIMDE_ARM_NEON_CEQ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vceqh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceqh_f16(a, b); - #else - return (simde_float16_to_float32(a) == simde_float16_to_float32(b)) ? UINT16_MAX : UINT16_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqh_f16 - #define vceqh_f16(a, b) simde_vceqh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vceqs_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqs_f32(a, b); - #else - return (a == b) ? ~UINT32_C(0) : UINT32_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqs_f32 - #define vceqs_f32(a, b) simde_vceqs_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqd_f64(a, b); - #else - return (a == b) ? ~UINT64_C(0) : UINT64_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqd_f64 - #define vceqd_f64(a, b) simde_vceqd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vceqd_s64(a, b)); - #else - return (a == b) ? ~UINT64_C(0) : UINT64_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqd_s64 - #define vceqd_s64(a, b) simde_vceqd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqd_u64(a, b); - #else - return (a == b) ? ~UINT64_C(0) : UINT64_C(0); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqd_u64 - #define vceqd_u64(a, b) simde_vceqd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceq_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceq_f16(a, b); - #else - simde_uint16x4_private r_; - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]); - } - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceq_f16 - #define vceq_f16(a, b) simde_vceq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceq_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_f32(a, b); - #else - simde_uint32x2_private r_; - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_f32 - #define vceq_f32(a, b) simde_vceq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceq_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceq_f64(a, b); - #else - simde_uint64x1_private r_; - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceq_f64 - #define vceq_f64(a, b) simde_vceq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceq_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_s8(a, b); - #else - simde_uint8x8_private r_; - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpeq_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_s8 - #define vceq_s8(a, b) simde_vceq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceq_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_s16(a, b); - #else - simde_uint16x4_private r_; - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpeq_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_s16 - #define vceq_s16(a, b) simde_vceq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceq_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_s32(a, b); - #else - simde_uint32x2_private r_; - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpeq_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_s32 - #define vceq_s32(a, b) simde_vceq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceq_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceq_s64(a, b); - #else - simde_uint64x1_private r_; - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_s64 - #define vceq_s64(a, b) simde_vceq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceq_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_u8(a, b); - #else - simde_uint8x8_private r_; - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_u8 - #define vceq_u8(a, b) simde_vceq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceq_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_u16(a, b); - #else - simde_uint16x4_private r_; - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_u16 - #define vceq_u16(a, b) simde_vceq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceq_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_u32(a, b); - #else - simde_uint32x2_private r_; - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_u32 - #define vceq_u32(a, b) simde_vceq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceq_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceq_u64(a, b); - #else - simde_uint64x1_private r_; - simde_uint64x1_private - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_u64 - #define vceq_u64(a, b) simde_vceq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceqq_f16(a, b); - #else - simde_uint16x8_private r_; - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceqq_f16 - #define vceqq_f16(a, b) simde_vceqq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); - #else - simde_uint32x4_private r_; - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_cmpeq_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_eq(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_f32 - #define vceqq_f32(a, b) simde_vceqq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); - #else - simde_uint64x2_private r_; - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castpd_si128(_mm_cmpeq_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_eq(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqq_f64 - #define vceqq_f64(a, b) simde_vceqq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); - #else - simde_uint8x16_private r_; - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_eq(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_s8 - #define vceqq_s8(a, b) simde_vceqq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); - #else - simde_uint16x8_private r_; - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_eq(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_s16 - #define vceqq_s16(a, b) simde_vceqq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); - #else - simde_uint32x4_private r_; - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_eq(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_s32 - #define vceqq_s32(a, b) simde_vceqq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); - #else - simde_uint64x2_private r_; - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_s64 - #define vceqq_s64(a, b) simde_vceqq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); - #else - simde_uint8x16_private r_; - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_u8 - #define vceqq_u8(a, b) simde_vceqq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); - #else - simde_uint16x8_private r_; - simde_uint16x8_private - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_u16 - #define vceqq_u16(a, b) simde_vceqq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); - #else - simde_uint32x4_private r_; - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_u32 - #define vceqq_u32(a, b) simde_vceqq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); - #else - simde_uint64x2_private r_; - simde_uint64x2_private - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_u64 - #define vceqq_u64(a, b) simde_vceqq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceq_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceq_p8(a, b); - #else - simde_uint8x8_private r_; - simde_poly8x8_private - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? HEDLEY_STATIC_CAST(uint8_t, ~UINT8_C(0)) : HEDLEY_STATIC_CAST(uint8_t, UINT8_C(0)); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceq_p8 - #define vceq_p8(a, b) simde_vceq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vceqq_p8(a, b); - #else - simde_uint8x16_private r_; - simde_poly8x16_private - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? HEDLEY_STATIC_CAST(uint8_t, ~UINT8_C(0)) : HEDLEY_STATIC_CAST(uint8_t, UINT8_C(0)); - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqq_p8 - #define vceqq_p8(a, b) simde_vceqq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceq_p64(simde_poly64x1_t a, simde_poly64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vceq_p64(a, b); - #else - simde_uint64x1_private r_; - simde_poly64x1_private - a_ = simde_poly64x1_to_private(a), - b_ = simde_poly64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceq_p64 - #define vceq_p64(a, b) simde_vceq_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqq_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vceqq_p64(a, b); - #else - simde_uint64x2_private r_; - simde_poly64x2_private - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceqq_p64 - #define vceqq_p64(a, b) simde_vceqq_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CEQ_H) */ -/* :: End simde/arm/neon/ceq.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ceqz.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CEQZ_H) -#define SIMDE_ARM_NEON_CEQZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceqz_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceqz_f16(a); - #else - return simde_vceq_f16(a, simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceqz_f16 - #define vceqz_f16(a) simde_vceqz_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceqz_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_f32(a); - #else - return simde_vceq_f32(a, simde_vdup_n_f32(0.0f)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_f32 - #define vceqz_f32(a) simde_vceqz_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceqz_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_f64(a); - #else - return simde_vceq_f64(a, simde_vdup_n_f64(0.0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqz_f64 - #define vceqz_f64(a) simde_vceqz_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceqz_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_s8(a); - #else - return simde_vceq_s8(a, simde_vdup_n_s8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_s8 - #define vceqz_s8(a) simde_vceqz_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceqz_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_s16(a); - #else - return simde_vceq_s16(a, simde_vdup_n_s16(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_s16 - #define vceqz_s16(a) simde_vceqz_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceqz_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_s32(a); - #else - return simde_vceq_s32(a, simde_vdup_n_s32(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_s32 - #define vceqz_s32(a) simde_vceqz_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceqz_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_s64(a); - #else - return simde_vceq_s64(a, simde_vdup_n_s64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_s64 - #define vceqz_s64(a) simde_vceqz_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceqz_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_u8(a); - #else - return simde_vceq_u8(a, simde_vdup_n_u8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_u8 - #define vceqz_u8(a) simde_vceqz_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vceqz_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_u16(a); - #else - return simde_vceq_u16(a, simde_vdup_n_u16(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_u16 - #define vceqz_u16(a) simde_vceqz_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vceqz_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_u32(a); - #else - return simde_vceq_u32(a, simde_vdup_n_u32(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_u32 - #define vceqz_u32(a) simde_vceqz_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceqz_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_u64(a); - #else - return simde_vceq_u64(a, simde_vdup_n_u64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqz_u64 - #define vceqz_u64(a) simde_vceqz_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqzq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceqzq_f16(a); - #else - return simde_vceqq_f16(a, simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vceqzq_f16 - #define vceqzq_f16(a) simde_vceqzq_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqzq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_f32(a); - #else - return simde_vceqq_f32(a, simde_vdupq_n_f32(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_f32 - #define vceqzq_f32(a) simde_vceqzq_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqzq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_f64(a); - #else - return simde_vceqq_f64(a, simde_vdupq_n_f64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqzq_f64 - #define vceqzq_f64(a) simde_vceqzq_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqzq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_s8(a); - #else - return simde_vceqq_s8(a, simde_vdupq_n_s8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_s8 - #define vceqzq_s8(a) simde_vceqzq_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqzq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_s16(a); - #else - return simde_vceqq_s16(a, simde_vdupq_n_s16(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_s16 - #define vceqzq_s16(a) simde_vceqzq_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqzq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_s32(a); - #else - return simde_vceqq_s32(a, simde_vdupq_n_s32(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_s32 - #define vceqzq_s32(a) simde_vceqzq_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqzq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_s64(a); - #else - return simde_vceqq_s64(a, simde_vdupq_n_s64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_s64 - #define vceqzq_s64(a) simde_vceqzq_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqzq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_u8(a); - #else - return simde_vceqq_u8(a, simde_vdupq_n_u8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_u8 - #define vceqzq_u8(a) simde_vceqzq_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vceqzq_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_u16(a); - #else - return simde_vceqq_u16(a, simde_vdupq_n_u16(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_u16 - #define vceqzq_u16(a) simde_vceqzq_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vceqzq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_u32(a); - #else - return simde_vceqq_u32(a, simde_vdupq_n_u32(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_u32 - #define vceqzq_u32(a) simde_vceqzq_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqzq_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_u64(a); - #else - return simde_vceqq_u64(a, simde_vdupq_n_u64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzq_u64 - #define vceqzq_u64(a) simde_vceqzq_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqzd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vceqzd_s64(a)); - #else - return simde_vceqd_s64(a, INT64_C(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzd_s64 - #define vceqzd_s64(a) simde_vceqzd_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqzd_u64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzd_u64(a); - #else - return simde_vceqd_u64(a, UINT64_C(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzd_u64 - #define vceqzd_u64(a) simde_vceqzd_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vceqzh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vceqzh_f16(a); - #else - return simde_vceqh_f16(a, SIMDE_FLOAT16_VALUE(0.0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqzh_f16 - #define vceqzh_f16(a) simde_vceqzh_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vceqzs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzs_f32(a); - #else - return simde_vceqs_f32(a, SIMDE_FLOAT32_C(0.0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzs_f32 - #define vceqzs_f32(a) simde_vceqzs_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vceqzd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzd_f64(a); - #else - return simde_vceqd_f64(a, SIMDE_FLOAT64_C(0.0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vceqzd_f64 - #define vceqzd_f64(a) simde_vceqzd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vceqz_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_p8(a); - #else - return simde_vceq_p8(a, simde_vdup_n_p8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqz_p8 - #define vceqz_p8(a) simde_vceqz_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vceqzq_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_p8(a); - #else - return simde_vceqq_p8(a, simde_vdupq_n_p8(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqzq_p8 - #define vceqzq_p8(a) simde_vceqzq_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vceqz_p64(simde_poly64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqz_p64(a); - #else - return simde_vceq_p64(a, simde_vdup_n_p64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqz_p64 - #define vceqz_p64(a) simde_vceqz_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vceqzq_p64(simde_poly64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vceqzq_p64(a); - #else - return simde_vceqq_p64(a, simde_vdupq_n_p64(0)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vceqzq_p64 - #define vceqzq_p64(a) simde_vceqzq_p64((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CEQZ_H) */ -/* :: End simde/arm/neon/ceqz.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cgez.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CGEZ_H) -#define SIMDE_ARM_NEON_CGEZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgezd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgezd_f64(a)); - #else - return (a >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezd_f64 - #define vcgezd_f64(a) simde_vcgezd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgezd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgezd_s64(a)); - #else - return (a >= 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezd_s64 - #define vcgezd_s64(a) simde_vcgezd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcgezs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcgezs_f32(a)); - #else - return (a >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezs_f32 - #define vcgezs_f32(a) simde_vcgezs_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcgezh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcgezh_f16(a)); - #else - return (simde_float16_to_float32(a) >= SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezh_f16 - #define vcgezh_f16(a) simde_vcgezh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgezq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgezq_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezh_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_f16 - #define vcgezq_f16(a) simde_vcgezq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgezq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezs_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_f32 - #define vcgezq_f32(a) simde_vcgezq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgezq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezd_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_f64 - #define vcgezq_f64(a) simde_vcgezq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgezq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_s8(a, simde_vdupq_n_s8(0)); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x16_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_s8 - #define vcgezq_s8(a) simde_vcgezq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgezq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_s16(a, simde_vdupq_n_s16(0)); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_s16 - #define vcgezq_s16(a) simde_vcgezq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgezq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_s32(a, simde_vdupq_n_s32(0)); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_s32 - #define vcgezq_s32(a) simde_vcgezq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgezq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgezq_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgeq_s64(a, simde_vdupq_n_s64(0)); - #else - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezd_s64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgezq_s64 - #define vcgezq_s64(a) simde_vcgezq_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgez_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgez_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezh_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_f16 - #define vcgez_f16(a) simde_vcgez_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgez_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezs_f32(a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_f32 - #define vcgez_f32(a) simde_vcgez_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgez_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezd_f64(a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_f64 - #define vcgez_f64(a) simde_vcgez_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcgez_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_s8(a, simde_vdup_n_s8(0)); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_s8 - #define vcgez_s8(a) simde_vcgez_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgez_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_s16(a, simde_vdup_n_s16(0)); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_uint16x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_s16 - #define vcgez_s16(a) simde_vcgez_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgez_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_s32(a, simde_vdup_n_s32(0)); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_s32 - #define vcgez_s32(a) simde_vcgez_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgez_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgez_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcge_s64(a, simde_vdup_n_s64(0)); - #else - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgezd_s64(a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgez_s64 - #define vcgez_s64(a) simde_vcgez_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CGEZ_H) */ -/* :: End simde/arm/neon/cgez.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cgtz.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CGTZ_H) -#define SIMDE_ARM_NEON_CGTZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgtzd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_s64(a)); - #else - return (a > 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzd_s64 - #define vcgtzd_s64(a) simde_vcgtzd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcgtzd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_f64(a)); - #else - return (a > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzd_f64 - #define vcgtzd_f64(a) simde_vcgtzd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcgtzh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcgtzh_f16(a)); - #else - return (simde_float16_to_float32(a) > SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzh_f16 - #define vcgtzh_f16(a) simde_vcgtzh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgtzq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgtzq_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzh_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_f16 - #define vcgtzq_f16(a) simde_vcgtzq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcgtzs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcgtzs_f32(a)); - #else - return (a > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzs_f32 - #define vcgtzs_f32(a) simde_vcgtzs_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgtzq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzs_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_f32 - #define vcgtzq_f32(a) simde_vcgtzq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgtzq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzd_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_f64 - #define vcgtzq_f64(a) simde_vcgtzq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcgtzq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_s8(a, simde_vdupq_n_s8(0)); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x16_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_s8 - #define vcgtzq_s8(a) simde_vcgtzq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcgtzq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_s16(a, simde_vdupq_n_s16(0)); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_s16 - #define vcgtzq_s16(a) simde_vcgtzq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcgtzq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_s32(a, simde_vdupq_n_s32(0)); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_s32 - #define vcgtzq_s32(a) simde_vcgtzq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcgtzq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtzq_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgtq_s64(a, simde_vdupq_n_s64(0)); - #else - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzd_s64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtzq_s64 - #define vcgtzq_s64(a) simde_vcgtzq_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgtz_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcgtz_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzh_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_f16 - #define vcgtz_f16(a) simde_vcgtz_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgtz_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzs_f32(a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_f32 - #define vcgtz_f32(a) simde_vcgtz_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgtz_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzd_f64(a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_f64 - #define vcgtz_f64(a) simde_vcgtz_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcgtz_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_s8(a, simde_vdup_n_s8(0)); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_s8 - #define vcgtz_s8(a) simde_vcgtz_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcgtz_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_s16(a, simde_vdup_n_s16(0)); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_uint16x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_s16 - #define vcgtz_s16(a) simde_vcgtz_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcgtz_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_s32(a, simde_vdup_n_s32(0)); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_s32 - #define vcgtz_s32(a) simde_vcgtz_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcgtz_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcgtz_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcgt_s64(a, simde_vdup_n_s64(0)); - #else - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcgtzd_s64(a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcgtz_s64 - #define vcgtz_s64(a) simde_vcgtz_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CGTZ_H) */ -/* :: End simde/arm/neon/cgtz.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cle.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CLE_H) -#define SIMDE_ARM_NEON_CLE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcled_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcled_f64(a, b)); - #else - return (a <= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcled_f64 - #define vcled_f64(a, b) simde_vcled_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcled_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcled_s64(a, b)); - #else - return (a <= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcled_s64 - #define vcled_s64(a, b) simde_vcled_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcled_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcled_u64(a, b)); - #else - return (a <= b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcled_u64 - #define vcled_u64(a, b) simde_vcled_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcles_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcles_f32(a, b)); - #else - return (a <= b) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcles_f32 - #define vcles_f32(a, b) simde_vcles_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcleh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcleh_f16(a, b)); - #else - return (simde_float16_to_float32(a) <= simde_float16_to_float32(b)) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcleh_f16 - #define vcleh_f16(a, b) simde_vcleh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcleq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcleq_f16(a, b); - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcleh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcleq_f16 - #define vcleq_f16(a, b) simde_vcleq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); - #else - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_cmple_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_f32 - #define vcleq_f32(a, b) simde_vcleq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcleq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcleq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); - #else - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castpd_si128(_mm_cmple_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcleq_f64 - #define vcleq_f64(a, b) simde_vcleq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcleq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_uint8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(b_.m128i, a_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_s8 - #define vcleq_s8(a, b) simde_vcleq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcleq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(b_.m128i, a_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_s16 - #define vcleq_s16(a, b) simde_vcleq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcleq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(b_.m128i, a_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_s32 - #define vcleq_s32(a, b) simde_vcleq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcleq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcleq_s64(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(b_.m128i, a_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcleq_s64 - #define vcleq_s64(a, b) simde_vcleq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcleq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - /* http://www.alfredklomp.com/programming/sse-intrinsics/ */ - r_.m128i = - _mm_cmpeq_epi8( - _mm_min_epu8(a_.m128i, b_.m128i), - a_.m128i - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_u8 - #define vcleq_u8(a, b) simde_vcleq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcleq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_cmpeq_epi16( - _mm_min_epu16(a_.m128i, b_.m128i), - a_.m128i - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi16(INT16_MIN); - r_.m128i = - _mm_or_si128( - _mm_cmpgt_epi16( - _mm_xor_si128(b_.m128i, sign_bits), - _mm_xor_si128(a_.m128i, sign_bits) - ), - _mm_cmpeq_epi16(a_.m128i, b_.m128i) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_u16 - #define vcleq_u16(a, b) simde_vcleq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcleq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcleq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_cmpeq_epi32( - _mm_min_epu32(a_.m128i, b_.m128i), - a_.m128i - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi32(INT32_MIN); - r_.m128i = - _mm_or_si128( - _mm_cmpgt_epi32( - _mm_xor_si128(b_.m128i, sign_bits), - _mm_xor_si128(a_.m128i, sign_bits) - ), - _mm_cmpeq_epi32(a_.m128i, b_.m128i) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_le(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcleq_u32 - #define vcleq_u32(a, b) simde_vcleq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcleq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = - _mm_cmpeq_epi64( - _mm_min_epu64(a_.m128i, b_.m128i), - a_.m128i - ); - #elif defined(SIMDE_X86_SSE4_2_NATIVE) - __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); - r_.m128i = - _mm_or_si128( - _mm_cmpgt_epi64( - _mm_xor_si128(b_.m128i, sign_bits), - _mm_xor_si128(a_.m128i, sign_bits) - ), - _mm_cmpeq_epi64(a_.m128i, b_.m128i) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcleq_u64 - #define vcleq_u64(a, b) simde_vcleq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcle_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcle_f16(a, b); - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcleh_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcle_f16 - #define vcle_f16(a, b) simde_vcle_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_f32(a, b); - #else - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_f32 - #define vcle_f32(a, b) simde_vcle_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcle_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcle_f64(a, b); - #else - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcle_f64 - #define vcle_f64(a, b) simde_vcle_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcle_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - simde_uint8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(b_.m64, a_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_s8 - #define vcle_s8(a, b) simde_vcle_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcle_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - simde_uint16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(b_.m64, a_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_s16 - #define vcle_s16(a, b) simde_vcle_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcle_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(b_.m64, a_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_s32 - #define vcle_s32(a, b) simde_vcle_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcle_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcle_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcle_s64 - #define vcle_s64(a, b) simde_vcle_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcle_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi8(INT8_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_u8 - #define vcle_u8(a, b) simde_vcle_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcle_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi16(INT16_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_u16 - #define vcle_u16(a, b) simde_vcle_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcle_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcle_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi32(INT32_MIN); - r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcle_u32 - #define vcle_u32(a, b) simde_vcle_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcle_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcle_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcle_u64 - #define vcle_u64(a, b) simde_vcle_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLE_H) */ -/* :: End simde/arm/neon/cle.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/clez.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CLEZ_H) -#define SIMDE_ARM_NEON_CLEZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vclezd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vclezd_s64(a)); - #else - return (a <= 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezd_s64 - #define vclezd_s64(a) simde_vclezd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vclezd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vclezd_f64(a)); - #else - return (a <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezd_f64 - #define vclezd_f64(a) simde_vclezd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vclezs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vclezs_f32(a)); - #else - return (a <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezs_f32 - #define vclezs_f32(a) simde_vclezs_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vclezh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vclezh_f16(a)); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - - return (a_ <= 0.0f) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezh_f16 - #define vclezh_f16(a) simde_vclezh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vclezq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vclezq_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclezh_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_f16 - #define vclezq_f16(a) simde_vclezq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vclezq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_f32 - #define vclezq_f32(a) simde_vclezq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vclezq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_f64 - #define vclezq_f64(a) simde_vclezq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vclezq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_s8(a, simde_vdupq_n_s8(0)); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x16_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_s8 - #define vclezq_s8(a) simde_vclezq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vclezq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_s16(a, simde_vdupq_n_s16(0)); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_s16 - #define vclezq_s16(a) simde_vclezq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vclezq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_s32(a, simde_vdupq_n_s32(0)); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_s32 - #define vclezq_s32(a) simde_vclezq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vclezq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclezq_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcleq_s64(a, simde_vdupq_n_s64(0)); - #else - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclezq_s64 - #define vclezq_s64(a) simde_vclezq_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclez_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vclez_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclezh_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vclez_f16 - #define vclez_f16(a) simde_vclez_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclez_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_f32 - #define vclez_f32(a) simde_vclez_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vclez_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_f64 - #define vclez_f64(a) simde_vclez_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vclez_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_s8(a, simde_vdup_n_s8(0)); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_s8 - #define vclez_s8(a) simde_vclez_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclez_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_s16(a, simde_vdup_n_s16(0)); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_uint16x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_s16 - #define vclez_s16(a) simde_vclez_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclez_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_s32(a, simde_vdup_n_s32(0)); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_s32 - #define vclez_s32(a) simde_vclez_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vclez_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclez_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcle_s64(a, simde_vdup_n_s64(0)); - #else - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclez_s64 - #define vclez_s64(a) simde_vclez_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLEZ_H) */ -/* :: End simde/arm/neon/clez.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cls.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_CLS_H) -#define SIMDE_ARM_NEON_CLS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/clz.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_CLZ_H) -#define SIMDE_ARM_NEON_CLZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_x_vclzb_u8(uint8_t a) { - #if \ - defined(SIMDE_BUILTIN_SUFFIX_8_) && \ - ( \ - SIMDE_BUILTIN_HAS_8_(clz) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) \ - ) - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(r); - - return HEDLEY_STATIC_CAST(uint8_t, SIMDE_BUILTIN_8_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_8_, a))); - #else - uint8_t r; - uint8_t shift; - - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(r); - - r = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x0F)) << 2); a >>= r; - shift = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x03)) << 1); a >>= shift; r |= shift; - r |= (a >> 1); - - return ((8 * sizeof(r)) - 1) - r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_x_vclzh_u16(uint16_t a) { - #if \ - defined(SIMDE_BUILTIN_SUFFIX_16_) && \ - ( \ - SIMDE_BUILTIN_HAS_16_(clz) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) \ - ) - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(r); - - return HEDLEY_STATIC_CAST(uint16_t, SIMDE_BUILTIN_16_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_16_, a))); - #else - uint16_t r; - uint16_t shift; - - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(r); - - r = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x00FF)) << 3); a >>= r; - shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x000F)) << 2); a >>= shift; r |= shift; - shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x0003)) << 1); a >>= shift; r |= shift; - r |= (a >> 1); - - return ((8 * sizeof(r)) - 1) - r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_x_vclzs_u32(uint32_t a) { - #if \ - defined(SIMDE_BUILTIN_SUFFIX_32_) && \ - ( \ - SIMDE_BUILTIN_HAS_32_(clz) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) \ - ) - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(a); - - return HEDLEY_STATIC_CAST(uint32_t, SIMDE_BUILTIN_32_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_32_, a))); - #else - uint32_t r; - uint32_t shift; - - if (HEDLEY_UNLIKELY(a == 0)) - return 8 * sizeof(a); - - r = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0xFFFF)) << 4); a >>= r; - shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x00FF)) << 3); a >>= shift; r |= shift; - shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x000F)) << 2); a >>= shift; r |= shift; - shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x0003)) << 1); a >>= shift; r |= shift; - r |= (a >> 1); - - return ((8 * sizeof(r)) - 1) - r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_x_vclzb_s8(int8_t a) { - return HEDLEY_STATIC_CAST(int8_t, simde_x_vclzb_u8(HEDLEY_STATIC_CAST(uint8_t, a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_vclzh_s16(int16_t a) { - return HEDLEY_STATIC_CAST(int16_t, simde_x_vclzh_u16(HEDLEY_STATIC_CAST(uint16_t, a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_x_vclzs_s32(int32_t a) { - return HEDLEY_STATIC_CAST(int32_t, simde_x_vclzs_u32(HEDLEY_STATIC_CAST(uint32_t, a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vclz_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_s8(a); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzb_s8(a_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_s8 - #define vclz_s8(a) simde_vclz_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vclz_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_s16(a); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzh_s16(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_s16 - #define vclz_s16(a) simde_vclz_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vclz_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_s32(a); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzs_s32(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_s32 - #define vclz_s32(a) simde_vclz_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vclz_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_u8(a); - #else - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzb_u8(a_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_u8 - #define vclz_u8(a) simde_vclz_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclz_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_u16(a); - #else - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzh_u16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_u16 - #define vclz_u16(a) simde_vclz_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclz_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclz_u32(a); - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzs_u32(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclz_u32 - #define vclz_u32(a) simde_vclz_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vclzq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_s8(a); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - r_; - - #if defined(SIMDE_X86_GFNI_NATIVE) - /* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */ - a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); - a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i); - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzb_s8(a_.values[i]); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_s8 - #define vclzq_s8(a) simde_vclzq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vclzq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_s16(a); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzh_s16(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_s16 - #define vclzq_s16(a) simde_vclzq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vclzq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_s32(a); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzs_s32(a_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_s32 - #define vclzq_s32(a) simde_vclzq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vclzq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_u8(a); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - r_; - - #if defined(SIMDE_X86_GFNI_NATIVE) - a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); - a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i); - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzb_u8(a_.values[i]); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_u8 - #define vclzq_u8(a) simde_vclzq_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vclzq_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_u16(a); - #else - simde_uint16x8_private - a_ = simde_uint16x8_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzh_u16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_u16 - #define vclzq_u16(a) simde_vclzq_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vclzq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclzq_u32(a); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vclzs_u32(a_.values[i]); - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclzq_u32 - #define vclzq_u32(a) simde_vclzq_u32(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLZ_H) */ -/* :: End simde/arm/neon/clz.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cltz.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */ - -#if !defined(SIMDE_ARM_NEON_CLTZ_H) -#define SIMDE_ARM_NEON_CLTZ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/clt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CLT_H) -#define SIMDE_ARM_NEON_CLT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcltd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcltd_f64(a, b)); - #else - return (a < b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltd_f64 - #define vcltd_f64(a, b) simde_vcltd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcltd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcltd_s64(a, b)); - #else - return (a < b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltd_s64 - #define vcltd_s64(a, b) simde_vcltd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcltd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcltd_u64(a, b)); - #else - return (a < b) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltd_u64 - #define vcltd_u64(a, b) simde_vcltd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vclth_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vclth_f16(a, b)); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - - return (a_ < b_) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclth_f16 - #define vclth_f16(a, b) simde_vclth_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vclts_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vclts_f32(a, b)); - #else - return (a < b) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclts_f32 - #define vclts_f32(a, b) simde_vclts_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcltq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcltq_f16(a, b); - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcltq_f16 - #define vcltq_f16(a, b) simde_vcltq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcltq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); - #else - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_cmplt_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_f32 - #define vcltq_f32(a, b) simde_vcltq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcltq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); - #else - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castpd_si128(_mm_cmplt_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltq_f64 - #define vcltq_f64(a, b) simde_vcltq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcltq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_uint8x16_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmplt_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_s8 - #define vcltq_s8(a, b) simde_vcltq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcltq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmplt_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_s16 - #define vcltq_s16(a, b) simde_vcltq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcltq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_cmplt_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_s32 - #define vcltq_s32(a, b) simde_vcltq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcltq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltq_s64(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = _mm_cmpgt_epi64(b_.m128i, a_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltq_s64 - #define vcltq_s64(a, b) simde_vcltq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcltq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128( - _mm_cmpeq_epi8(b_.m128i, a_.m128i), - _mm_cmpeq_epi8(_mm_max_epu8(b_.m128i, a_.m128i), b_.m128i) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_u8 - #define vcltq_u8(a, b) simde_vcltq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcltq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_andnot_si128( - _mm_cmpeq_epi16(b_.m128i, a_.m128i), - _mm_cmpeq_epi16(_mm_max_epu16(b_.m128i, a_.m128i), b_.m128i) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi16(INT16_MIN); - r_.m128i = _mm_cmplt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_u16 - #define vcltq_u16(a, b) simde_vcltq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcltq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcltq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_andnot_si128( - _mm_cmpeq_epi32(b_.m128i, a_.m128i), - _mm_cmpeq_epi32(_mm_max_epu32(b_.m128i, a_.m128i), b_.m128i) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i sign_bits = _mm_set1_epi32(INT32_MIN); - r_.m128i = _mm_cmplt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_lt(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltq_u32 - #define vcltq_u32(a, b) simde_vcltq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcltq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_andnot_si128( - _mm_cmpeq_epi64(b_.m128i, a_.m128i), - _mm_cmpeq_epi64(_mm_max_epu64(b_.m128i, a_.m128i), b_.m128i) - ); - #elif defined(SIMDE_X86_SSE4_2_NATIVE) - __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); - r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(b_.m128i, sign_bits), _mm_xor_si128(a_.m128i, sign_bits)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltq_u64 - #define vcltq_u64(a, b) simde_vcltq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclt_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vclt_f16(a, b); - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclth_f16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vclt_f16 - #define vclt_f16(a, b) simde_vclt_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclt_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_f32(a, b); - #else - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_f32 - #define vclt_f32(a, b) simde_vclt_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vclt_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclt_f64(a, b); - #else - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclt_f64 - #define vclt_f64(a, b) simde_vclt_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vclt_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - simde_uint8x8_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi8(b_.m64, a_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_s8 - #define vclt_s8(a, b) simde_vclt_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclt_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - simde_uint16x4_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi16(b_.m64, a_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_s16 - #define vclt_s16(a, b) simde_vclt_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclt_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_cmpgt_pi32(b_.m64, a_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_s32 - #define vclt_s32(a, b) simde_vclt_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vclt_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclt_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclt_s64 - #define vclt_s64(a, b) simde_vclt_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vclt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi8(INT8_MIN); - r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_u8 - #define vclt_u8(a, b) simde_vclt_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vclt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi16(INT16_MIN); - r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_u16 - #define vclt_u16(a, b) simde_vclt_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vclt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclt_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - __m64 sign_bits = _mm_set1_pi32(INT32_MIN); - r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclt_u32 - #define vclt_u32(a, b) simde_vclt_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vclt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vclt_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vclt_u64 - #define vclt_u64(a, b) simde_vclt_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLT_H) */ -/* :: End simde/arm/neon/clt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcltzd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcltzd_s64(a)); - #else - return (a < 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltzd_s64 - #define vcltzd_s64(a) simde_vcltzd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcltzd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vcltzd_f64(a)); - #else - return (a < SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltzd_f64 - #define vcltzd_f64(a) simde_vcltzd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcltzs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vcltzs_f32(a)); - #else - return (a < SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltzs_f32 - #define vcltzs_f32(a) simde_vcltzs_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcltzh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(uint16_t, vcltzh_f16(a)); - #else - return (simde_float16_to_float32(a) < SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltzh_f16 - #define vcltzh_f16(a) simde_vcltzh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcltz_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcltz_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltzh_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_f16 - #define vcltz_f16(a) simde_vcltz_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcltz_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_f32 - #define vcltz_f32(a) simde_vcltz_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcltz_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltz_f64 - #define vcltz_f64(a) simde_vcltz_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcltz_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_s8(a, simde_vdup_n_s8(0)); - #else - return simde_vreinterpret_u8_s8(simde_vshr_n_s8(a, 7)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_s8 - #define vcltz_s8(a) simde_vcltz_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcltz_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_s16(a, simde_vdup_n_s16(0)); - #else - return simde_vreinterpret_u16_s16(simde_vshr_n_s16(a, 15)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_s16 - #define vcltz_s16(a) simde_vcltz_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcltz_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_s32(a, simde_vdup_n_s32(0)); - #else - return simde_vreinterpret_u32_s32(simde_vshr_n_s32(a, 31)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_s32 - #define vcltz_s32(a) simde_vcltz_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcltz_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltz_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vclt_s64(a, simde_vdup_n_s64(0)); - #else - return simde_vreinterpret_u64_s64(simde_vshr_n_s64(a, 63)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltz_s64 - #define vcltz_s64(a) simde_vcltz_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcltzq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcltzq_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcltzh_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcltzq_f16 - #define vcltzq_f16(a) simde_vcltzq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcltzq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_f32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltzq_f32 - #define vcltzq_f32(a) simde_vcltzq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcltzq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_f64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcltzq_f64 - #define vcltzq_f64(a) simde_vcltzq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcltzq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_s8(a, simde_vdupq_n_s8(0)); - #else - return simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(a, 7)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltzq_s8 - #define vcltzq_s8(a) simde_vcltzq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcltzq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_s16(a, simde_vdupq_n_s16(0)); - #else - return simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(a, 15)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltzq_s16 - #define vcltzq_s16(a) simde_vcltzq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcltzq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_s32(a, simde_vdupq_n_s32(0)); - #else - return simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(a, 31)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltzq_s32 - #define vcltzq_s32(a) simde_vcltzq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcltzq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcltzq_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vcltq_s64(a, simde_vdupq_n_s64(0)); - #else - return simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(a, 63)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcltzq_s64 - #define vcltzq_s64(a) simde_vcltzq_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLTZ_H) */ -/* :: End simde/arm/neon/cltz.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mvn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MVN_H) -#define SIMDE_ARM_NEON_MVN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmvnq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_s8 - #define vmvnq_s8(a) simde_vmvnq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmvnq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_s16 - #define vmvnq_s16(a) simde_vmvnq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmvnq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_s32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_s32 - #define vmvnq_s32(a) simde_vmvnq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmvnq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_u8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_u8 - #define vmvnq_u8(a) simde_vmvnq_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmvnq_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_u16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_u16 - #define vmvnq_u16(a) simde_vmvnq_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmvnq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_u32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_nor(a, a); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_not(a_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_u32 - #define vmvnq_u32(a) simde_vmvnq_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmvn_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_s8 - #define vmvn_s8(a) simde_vmvn_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmvn_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_s16(a); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_s16 - #define vmvn_s16(a) simde_vmvn_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmvn_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_s32(a); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_s32 - #define vmvn_s32(a) simde_vmvn_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmvn_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_u8(a); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_u8 - #define vmvn_u8(a) simde_vmvn_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmvn_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_u16(a); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_u16 - #define vmvn_u16(a) simde_vmvn_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmvn_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_u32(a); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = ~a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_u32 - #define vmvn_u32(a) simde_vmvn_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vmvn_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvn_p8(a); - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvn_p8 - #define vmvn_p8(a) simde_vmvn_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vmvnq_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmvnq_p8(a); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ~(a_.values[i]); - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmvnq_p8 - #define vmvnq_p8(a) simde_vmvnq_p8(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MVN_H) */ -/* :: End simde/arm/neon/mvn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vcls_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcls_s8(a); - #else - return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1))); - #endif -} -#define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcls_s8 - #define vcls_s8(a) simde_vcls_s8(a) - #undef vcls_u8 - #define vcls_u8(a) simde_vcls_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcls_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcls_s16(a); - #else - return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1))); - #endif -} -#define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcls_s16 - #define vcls_s16(a) simde_vcls_s16(a) - #undef vcls_u16 - #define vcls_u16(a) simde_vcls_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcls_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcls_s32(a); - #else - return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1))); - #endif -} -#define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcls_s32 - #define vcls_s32(a) simde_vcls_s32(a) - #undef vcls_u32 - #define vcls_u32(a) simde_vcls_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vclsq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclsq_s8(a); - #else - return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1))); - #endif -} -#define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclsq_s8 - #define vclsq_s8(a) simde_vclsq_s8(a) - #undef vclsq_u8 - #define vclsq_u8(a) simde_vclsq_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vclsq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclsq_s16(a); - #else - return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1))); - #endif -} -#define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclsq_s16 - #define vclsq_s16(a) simde_vclsq_s16(a) - #undef vclsq_u16 - #define vclsq_u16(a) simde_vclsq_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vclsq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vclsq_s32(a); - #else - return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1))); - #endif -} -#define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a)) -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vclsq_s32 - #define vclsq_s32(a) simde_vclsq_s32(a) - #undef vclsq_u32 - #define vclsq_u32(a) simde_vclsq_u32(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CLS_H) */ -/* :: End simde/arm/neon/cls.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_CMLA_H) -#define SIMDE_ARM_NEON_CMLA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmla_f16(r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0]) / 2) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i])); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_f16 - #define vcmla_f16(r, a, b) simde_vcmla_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmla_f32(r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] += b_.values[i] * a_.values[i & 2]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_f32 - #define vcmla_f32(r, a, b) simde_vcmla_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_f16(r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0]) / 2) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i])); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_f16 - #define vcmlaq_f16(r, a, b) simde_vcmlaq_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_f32(r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, wasm_i32x4_shuffle(a_.v128, a_.v128, 0, 0, 2, 2))); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] += b_.values[i] * a_.values[i & 2]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_f32 - #define vcmlaq_f32(r, a, b) simde_vcmlaq_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcmlaq_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_f64(r, a, b); - #else - simde_float64x2_private - r_ = simde_float64x2_to_private(r), - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, wasm_i64x2_shuffle(a_.v128, a_.v128, 0, 0))); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] += b_.values[i] * a_.values[i & 2]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_f64 - #define vcmlaq_f64(r, a, b) simde_vcmlaq_f64(r, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ -/* :: End simde/arm/neon/cmla.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CMLA_LANE_H) -#define SIMDE_ARM_NEON_CMLA_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cvt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Sean Maher - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CVT_H) -#define SIMDE_ARM_NEON_CVT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcvt_f16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_f16_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_float16x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(a_.values[i]); - } - #endif - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f16_f32 - #define vcvt_f16_f32(a) simde_vcvt_f16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvt_f32_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_f32_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_float32x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_to_float32(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f32_f16 - #define vcvt_f32_f16(a) simde_vcvt_f32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvt_f32_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvt_f32_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_float32x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i]); - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f32_f64 - #define vcvt_f32_f64(a) simde_vcvt_f32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvt_f64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvt_f64_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_float64x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f64_f32 - #define vcvt_f64_f32(a) simde_vcvt_f64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvth_s16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_s16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { - return INT16_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { - return INT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int16_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_s16_f16 - #define vcvth_s16_f16(a) simde_vcvth_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvth_u16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_u16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { - return UINT16_MAX; - } else if (simde_isnanhf(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint16_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_u16_f16 - #define vcvth_u16_f16(a) simde_vcvth_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvth_s32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_s32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_s32_f16 - #define vcvth_s32_f16(a) simde_vcvth_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvth_u32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_u32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_u32_f16 - #define vcvth_u32_f16(a) simde_vcvth_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvth_s64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_s64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { - return INT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_s64_f16 - #define vcvth_s64_f16(a) simde_vcvth_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvth_u64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_u64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, - simde_float16_to_float32(a)); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { - return UINT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, af); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_u64_f16 - #define vcvth_u64_f16(a) simde_vcvth_u64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvts_s32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvts_s32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, a); - #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, a); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_s32_f32 - #define vcvts_s32_f32(a) simde_vcvts_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvts_u32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvts_u32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, a); - #else - if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (simde_math_isnanf(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, a); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_u32_f32 - #define vcvts_u32_f32(a) simde_vcvts_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_vcvts_f32_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvts_f32_s32(a); - #else - return HEDLEY_STATIC_CAST(simde_float32, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_f32_s32 - #define vcvts_f32_s32(a) simde_vcvts_f32_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_vcvts_f32_u32 (uint32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvts_f32_u32(a); - #else - return HEDLEY_STATIC_CAST(simde_float32, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_f32_u32 - #define vcvts_f32_u32(a) simde_vcvts_f32_u32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtd_s64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtd_s64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, a); - #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { - return INT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, a); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_s64_f64 - #define vcvtd_s64_f64(a) simde_vcvtd_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtd_u64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtd_u64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, a); - #else - if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT64_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, a); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_u64_f64 - #define vcvtd_u64_f64(a) simde_vcvtd_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_vcvtd_f64_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtd_f64_s64(a); - #else - return HEDLEY_STATIC_CAST(simde_float64, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_f64_s64 - #define vcvtd_f64_s64(a) simde_vcvtd_f64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_vcvtd_f64_u64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtd_f64_u64(a); - #else - return HEDLEY_STATIC_CAST(simde_float64, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_f64_u64 - #define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_u32(uint32_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_u32(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_u32 - #define vcvth_f16_u32(a) simde_vcvth_f16_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_u64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_u64(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_u64 - #define vcvth_f16_u64(a) simde_vcvth_f16_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_s32(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_s32 - #define vcvth_f16_s32(a) simde_vcvth_f16_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_s64(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_s64 - #define vcvth_f16_s64(a) simde_vcvth_f16_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_s16(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_s16 - #define vcvth_f16_s16(a) simde_vcvth_f16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_u16(uint16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_u16(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_u16 - #define vcvth_f16_u16(a) simde_vcvth_f16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvt_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_s16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_s16_f16 - #define vcvt_s16_f16(a) simde_vcvt_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvt_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcvt_s32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_s32_f32 - #define vcvt_s32_f32(a) simde_vcvt_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvt_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_u16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_u16_f16 - #define vcvt_u16_f16(a) simde_vcvt_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvt_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvt_u32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_u32_f32 - #define vcvt_u32_f32(a) simde_vcvt_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvt_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvt_s64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_s64_f64 - #define vcvt_s64_f64(a) simde_vcvt_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvt_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvt_u64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= SIMDE_FLOAT64_C(0.0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_u64_f64 - #define vcvt_u64_f64(a) simde_vcvt_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtq_s16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_s16_f16 - #define vcvtq_s16_f16(a) simde_vcvtq_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcvtq_s32_f32(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) - return vec_signed(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_BUG_GCC_101614) - return (a == a) & vec_signed(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_trunc_sat_f32x4(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - const __m128i i32_max_mask = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(2147483520.0)))); - const __m128 clamped = _mm_max_ps(a_.m128, _mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))); - #else - const __m128 clamped = a_.m128; - #endif - - r_.m128i = _mm_cvttps_epi32(clamped); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_castps_si128( - _mm_blendv_ps( - _mm_castsi128_ps(r_.m128i), - _mm_castsi128_ps(_mm_set1_epi32(INT32_MAX)), - _mm_castsi128_ps(i32_max_mask) - ) - ); - #else - r_.m128i = - _mm_or_si128( - _mm_and_si128(i32_max_mask, _mm_set1_epi32(INT32_MAX)), - _mm_andnot_si128(i32_max_mask, r_.m128i) - ); - #endif - #endif - - #if !defined(SIMDE_FAST_NANS) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpord_ps(a_.m128, a_.m128))); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - - static const float SIMDE_VECTOR(16) max_representable = { SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0) }; - int32_t SIMDE_VECTOR(16) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.values > max_representable); - int32_t SIMDE_VECTOR(16) max_i32 = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; - r_.values = (max_i32 & max_mask) | (r_.values & ~max_mask); - - static const float SIMDE_VECTOR(16) min_representable = { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) }; - int32_t SIMDE_VECTOR(16) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.values < min_representable); - int32_t SIMDE_VECTOR(16) min_i32 = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.values = (min_i32 & min_mask) | (r_.values & ~min_mask); - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_s32_f32 - #define vcvtq_s32_f32(a) simde_vcvtq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtq_u16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_u16_f16 - #define vcvtq_u16_f16(a) simde_vcvtq_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtq_u32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_trunc_sat_f32x4(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_cvttps_epu32(a_.m128); - #else - __m128 first_oob_high = _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0)); - __m128 neg_zero_if_too_high = - _mm_castsi128_ps( - _mm_slli_epi32( - _mm_castps_si128(_mm_cmple_ps(first_oob_high, a_.m128)), - 31 - ) - ); - r_.m128i = - _mm_xor_si128( - _mm_cvttps_epi32( - _mm_sub_ps(a_.m128, _mm_and_ps(neg_zero_if_too_high, first_oob_high)) - ), - _mm_castps_si128(neg_zero_if_too_high) - ); - #endif - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpgt_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(0.0))))); - r_.m128i = _mm_or_si128 (r_.m128i, _mm_castps_si128(_mm_cmpge_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0))))); - #endif - - #if !defined(SIMDE_FAST_NANS) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpord_ps(a_.m128, a_.m128))); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - - const __typeof__(a_.values) max_representable = { SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0) }; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > max_representable); - - const __typeof__(a_.values) min_representable = { SIMDE_FLOAT32_C(0.0), }; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > min_representable); - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_u32_f32 - #define vcvtq_u32_f32(a) simde_vcvtq_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtq_s64_f64(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) - return vec_signed(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return (a == a) & vec_signed(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && (defined(SIMDE_ARCH_AMD64) || (defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE))) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - const __m128i i64_max_mask = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, _mm_set1_pd(HEDLEY_STATIC_CAST(simde_float64, INT64_MAX)))); - const __m128d clamped_low = _mm_max_pd(a_.m128d, _mm_set1_pd(HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))); - #else - const __m128d clamped_low = a_.m128d; - #endif - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - r_.m128i = _mm_cvttpd_epi64(clamped_low); - #else - r_.m128i = - _mm_set_epi64x( - _mm_cvttsd_si64(_mm_unpackhi_pd(clamped_low, clamped_low)), - _mm_cvttsd_si64(clamped_low) - ); - #endif - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_castpd_si128( - _mm_blendv_pd( - _mm_castsi128_pd(r_.m128i), - _mm_castsi128_pd(_mm_set1_epi64x(INT64_MAX)), - _mm_castsi128_pd(i64_max_mask) - ) - ); - #else - r_.m128i = - _mm_or_si128( - _mm_and_si128(i64_max_mask, _mm_set1_epi64x(INT64_MAX)), - _mm_andnot_si128(i64_max_mask, r_.m128i) - ); - #endif - #endif - - #if !defined(SIMDE_FAST_NANS) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpord_pd(a_.m128d, a_.m128d))); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - - const __typeof__((a_.values)) max_representable = { SIMDE_FLOAT64_C(9223372036854774784.0), SIMDE_FLOAT64_C(9223372036854774784.0) }; - __typeof__(r_.values) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.values > max_representable); - __typeof__(r_.values) max_i64 = { INT64_MAX, INT64_MAX }; - r_.values = (max_i64 & max_mask) | (r_.values & ~max_mask); - - const __typeof__((a_.values)) min_representable = { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN), HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) }; - __typeof__(r_.values) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.values < min_representable); - __typeof__(r_.values) min_i64 = { INT64_MIN, INT64_MIN }; - r_.values = (min_i64 & min_mask) | (r_.values & ~min_mask); - - #if !defined(SIMDE_FAST_NANS) - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); - #endif - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_s64_f64 - #define vcvtq_s64_f64(a) simde_vcvtq_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtq_u64_f64(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) - return vec_unsigned(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return HEDLEY_REINTERPRET_CAST(simde_uint64x2_t, (a == a)) & vec_unsigned(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #elif defined(SIMDE_X86_SSE2_NATIVE) && (defined(SIMDE_ARCH_AMD64) || (defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE))) - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_cvttpd_epu64(a_.m128d); - #else - __m128d first_oob_high = _mm_set1_pd(SIMDE_FLOAT64_C(18446744073709551616.0)); - __m128d neg_zero_if_too_high = - _mm_castsi128_pd( - _mm_slli_epi64( - _mm_castpd_si128(_mm_cmple_pd(first_oob_high, a_.m128d)), - 63 - ) - ); - __m128d tmp = _mm_sub_pd(a_.m128d, _mm_and_pd(neg_zero_if_too_high, first_oob_high)); - r_.m128i = - _mm_xor_si128( - _mm_set_epi64x( - _mm_cvttsd_si64(_mm_unpackhi_pd(tmp, tmp)), - _mm_cvttsd_si64(tmp) - ), - _mm_castpd_si128(neg_zero_if_too_high) - ); - #endif - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, _mm_set1_pd(SIMDE_FLOAT64_C(0.0))))); - r_.m128i = _mm_or_si128 (r_.m128i, _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, _mm_set1_pd(SIMDE_FLOAT64_C(18446744073709551616.0))))); - #endif - - #if !defined(SIMDE_FAST_NANS) - r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpord_pd(a_.m128d, a_.m128d))); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - - const __typeof__(a_.values) max_representable = { SIMDE_FLOAT64_C(18446744073709549568.0), SIMDE_FLOAT64_C(18446744073709549568.0) }; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > max_representable); - - const __typeof__(a_.values) min_representable = { SIMDE_FLOAT64_C(0.0), }; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > min_representable); - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values == a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_u64_f64 - #define vcvtq_u64_f64(a) simde_vcvtq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcvt_f16_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_f16_s16(a); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_float16x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); - #endif - } - #endif - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f16_s16 - #define vcvt_f16_s16(a) simde_vcvt_f16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvt_f32_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcvt_f32_s32(a); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_float32x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_f32_s32 - #define vcvt_f32_s32(a) simde_vcvt_f32_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcvt_f16_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_f16_u16(a); - #else - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_float16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); - #endif - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f16_u16 - #define vcvt_f16_u16(a) simde_vcvt_f16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvt_f32_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvt_f32_u32(a); - #else - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_float32x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_f32_u32 - #define vcvt_f32_u32(a) simde_vcvt_f32_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcvt_f64_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvt_f64_s64(a); - #else - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_float64x1_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f64_s64 - #define vcvt_f64_s64(a) simde_vcvt_f64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcvt_f64_u64(simde_uint64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvt_f64_u64(a); - #else - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_float64x1_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f64_u64 - #define vcvt_f64_u64(a) simde_vcvt_f64_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcvtq_f16_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtq_f16_s16(a); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_float16x8_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); - #endif - } - #endif - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f16_s16 - #define vcvtq_f16_s16(a) simde_vcvtq_f16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_f32_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcvtq_f32_s32(a); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_float32x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f32_s32 - #define vcvtq_f32_s32(a) simde_vcvtq_f32_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcvtq_f16_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) - return vcvtq_f16_u16(a); - #else - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_float16x8_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); - #endif - } - #endif - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f16_u16 - #define vcvtq_f16_u16(a) simde_vcvtq_f16_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_f32_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtq_f32_u32(a); - #else - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_float32x4_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f32_u32 - #define vcvtq_f32_u32(a) simde_vcvtq_f32_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvtq_f64_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtq_f64_s64(a); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_ctd(a, 0); - #else - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_float64x2_private r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - r_.m128d = _mm_cvtepi64_pd(a_.m128i); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f64_s64 - #define vcvtq_f64_s64(a) simde_vcvtq_f64_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvtq_f64_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtq_f64_u64(a); - #else - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_float64x2_private r_; - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_f64_u64 - #define vcvtq_f64_u64(a) simde_vcvtq_f64_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvtah_s16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_s16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { - return INT16_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { - return INT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int16_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_s16_f16 - #define vcvtah_s16_f16(a) simde_vcvtah_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvtah_u16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_u16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { - return UINT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_u16_f16 - #define vcvtah_u16_f16(a) simde_vcvtah_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtah_s32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_s32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_s32_f16 - #define vcvtah_s32_f16(a) simde_vcvtah_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtah_u32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_u32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_u32_f16 - #define vcvtah_u32_f16(a) simde_vcvtah_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtah_s64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_s64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { - return INT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_s64_f16 - #define vcvtah_s64_f16(a) simde_vcvtah_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtah_u64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) - return vcvtah_u64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, - simde_math_roundf(simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { - return UINT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_u64_f16 - #define vcvtah_u64_f16(a) simde_vcvtah_u64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtad_s64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtad_s64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_round(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { - return INT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnan(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_round(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtad_s64_f64 - #define vcvtad_s64_f64(a) simde_vcvtad_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtad_u64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtad_u64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_round(a)); - #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_round(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtad_u64_f64 - #define vcvtad_u64_f64(a) simde_vcvtad_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtas_s32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtas_s32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtas_s32_f32 - #define vcvtas_s32_f32(a) simde_vcvtas_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtas_u32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtas_u32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a)); - #else - if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - if (a < 0) return 0; - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtas_u32_f32 - #define vcvtas_u32_f32(a) simde_vcvtas_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvta_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvta_s16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtah_s16_f16(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_s16_f16 - #define vcvta_s16_f16(a) simde_vcvta_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvta_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvta_u16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtah_u16_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_u16_f16 - #define vcvta_u16_f16(a) simde_vcvta_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvta_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvta_s64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtad_s64_f64(a_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_s64_f64 - #define vcvta_s64_f64(a) simde_vcvta_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvta_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvta_u64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtad_u64_f64(a_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_u64_f64 - #define vcvta_u64_f64(a) simde_vcvta_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvta_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvta_s32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtas_s32_f32(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_s32_f32 - #define vcvta_s32_f32(a) simde_vcvta_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtaq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtaq_s16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtah_s16_f16(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_s16_f16 - #define vcvtaq_s16_f16(a) simde_vcvtaq_s16_f16(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtaq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtaq_u16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtah_u16_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_u16_f16 - #define vcvtaq_u16_f16(a) simde_vcvtaq_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtaq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtaq_s32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtas_s32_f32(a_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_s32_f32 - #define vcvtaq_s32_f32(a) simde_vcvtaq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtaq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtaq_s64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtad_s64_f64(a_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_s64_f64 - #define vcvtaq_s64_f64(a) simde_vcvtaq_s64_f64(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtaq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtaq_u64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtad_u64_f64(a_.values[i]); - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_u64_f64 - #define vcvtaq_u64_f64(a) simde_vcvtaq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvta_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvta_u32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtas_u32_f32(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvta_u32_f32 - #define vcvta_u32_f32(a) simde_vcvta_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtaq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtaq_u32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtas_u32_f32(a_.values[i]); - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtaq_u32_f32 - #define vcvtaq_u32_f32(a) simde_vcvtaq_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcvt_high_f16_f32(simde_float16x4_t r, simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_high_f16_f32(r, a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_float16x4_private b_ = simde_float16x4_to_private(r); - simde_float16x8_private r_; - - size_t half_pos = (sizeof(r_.values) / sizeof(r_.values[0]) / 2); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < half_pos; i++) { - r_.values[i] = b_.values[i]; - } - SIMDE_VECTORIZE - for (size_t i = half_pos; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(a_.values[i-half_pos]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_high_f16_f32 - #define vcvt_high_f16_f32(r, a) simde_vcvt_high_f16_f32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvt_high_f32_f64(simde_float32x2_t r, simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_high_f32_f64(r, a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_float32x2_private b_ = simde_float32x2_to_private(r); - simde_float32x4_private r_; - - size_t half_pos = (sizeof(r_.values) / sizeof(r_.values[0]) / 2); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < half_pos; i++) { - r_.values[i] = b_.values[i]; - } - SIMDE_VECTORIZE - for (size_t i = half_pos; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i-half_pos]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_high_f32_f64 - #define vcvt_high_f32_f64(r, a) simde_vcvt_high_f32_f64((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvt_high_f32_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvt_high_f32_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_float32x4_private r_; - - size_t rsize = (sizeof(r_.values) / sizeof(r_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < rsize; i++) { - r_.values[i] = simde_float16_to_float32(a_.values[i+rsize]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_high_f32_f16 - #define vcvt_high_f32_f16(a) simde_vcvt_high_f32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvt_high_f64_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvt_high_f64_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_float64x2_private r_; - - size_t rsize = (sizeof(r_.values) / sizeof(r_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i+rsize]); - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_high_f64_f32 - #define vcvt_high_f64_f32(a) simde_vcvt_high_f64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vcvtxd_f32_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtxd_f32_f64(a); - #else - return HEDLEY_STATIC_CAST(simde_float32_t, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtxd_f32_f64 - #define vcvtxd_f32_f64(a) simde_vcvtxd_f32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvtx_f32_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtx_f32_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_float32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtxd_f32_f64(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtx_f32_f64 - #define vcvtx_f32_f64(a) simde_vcvtx_f32_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtx_high_f32_f64(simde_float32x2_t r, simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtx_high_f32_f64(r, a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_float32x2_private r_ = simde_float32x2_to_private(r); - simde_float32x4_private ret; - - size_t half_pos = (sizeof(ret.values) / sizeof(ret.values[0]) / 2); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < half_pos; i++) { - ret.values[i] = r_.values[i]; - } - SIMDE_VECTORIZE - for (size_t i = half_pos; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = simde_vcvtxd_f32_f64(a_.values[i-half_pos]); - } - - return simde_float32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtx_high_f32_f64 - #define vcvtx_high_f32_f64(r, a) simde_vcvtx_high_f32_f64((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vcvt_bf16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvt_bf16_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_bfloat16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_bfloat16_from_float32(a_.values[i]); - } - - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_bf16_f32 - #define vcvt_bf16_f32(a) simde_vcvt_bf16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvt_f32_bf16(simde_bfloat16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvt_f32_bf16(a); - #else - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_float32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_bfloat16_to_float32(a_.values[i]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_f32_bf16 - #define vcvt_f32_bf16(a) simde_vcvt_f32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vcvtah_f32_bf16(simde_bfloat16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtah_f32_bf16(a); - #else - return simde_bfloat16_to_float32(a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtah_f32_bf16 - #define vcvtah_f32_bf16(a) simde_vcvtah_f32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16_t -simde_vcvth_bf16_f32(float a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvth_bf16_f32(a); - #else - return simde_bfloat16_from_float32(a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_bf16_f32 - #define vcvth_bf16_f32(a) simde_vcvth_bf16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_low_f32_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtq_low_f32_bf16(a); - #else - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_float32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_bfloat16_to_float32(a_.values[i]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_low_f32_bf16 - #define vcvtq_low_f32_bf16(a) simde_vcvtq_low_f32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_high_f32_bf16(simde_bfloat16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtq_high_f32_bf16(a); - #else - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_float32x4_private r_; - - size_t rsize = (sizeof(r_.values) / sizeof(r_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_bfloat16_to_float32(a_.values[i + rsize]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_high_f32_bf16 - #define vcvtq_high_f32_bf16(a) simde_vcvtq_high_f32_bf16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vcvtq_low_bf16_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtq_low_bf16_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_bfloat16x8_private r_; - - size_t asize = (sizeof(a_.values) / sizeof(a_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < asize; i++) { - r_.values[i] = simde_bfloat16_from_float32(a_.values[i]); - r_.values[i + asize] = SIMDE_BFLOAT16_VALUE(0.0); - } - - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_low_bf16_f32 - #define vcvtq_low_bf16_f32(a) simde_vcvtq_low_bf16_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vcvtq_high_bf16_f32(simde_bfloat16x8_t inactive, simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcvtq_high_bf16_f32(inactive, a); - #else - simde_bfloat16x8_private inactive_ = simde_bfloat16x8_to_private(inactive); - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_bfloat16x8_private r_; - - size_t asize = (sizeof(a_.values) / sizeof(a_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r_.values[i] = inactive_.values[i]; - r_.values[i + asize] = simde_bfloat16_from_float32(a_.values[i]); - } - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_high_bf16_f32 - #define vcvtq_high_bf16_f32(inactive, a) simde_vcvtq_high_bf16_f32((inactive), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_CVT_H */ -/* :: End simde/arm/neon/cvt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/dup_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_DUP_LANE_H) -#define SIMDE_ARM_NEON_DUP_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vdups_lane_s32(simde_int32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_int32x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_lane_s32(vec, lane) vdups_lane_s32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_lane_s32 - #define vdups_lane_s32(vec, lane) simde_vdups_lane_s32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vdups_lane_u32(simde_uint32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_uint32x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_lane_u32(vec, lane) vdups_lane_u32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_lane_u32 - #define vdups_lane_u32(vec, lane) simde_vdups_lane_u32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vdups_lane_f32(simde_float32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_float32x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_lane_f32(vec, lane) vdups_lane_f32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_lane_f32 - #define vdups_lane_f32(vec, lane) simde_vdups_lane_f32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vdups_laneq_s32(simde_int32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_int32x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_laneq_s32(vec, lane) vdups_laneq_s32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_laneq_s32 - #define vdups_laneq_s32(vec, lane) simde_vdups_laneq_s32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vdups_laneq_u32(simde_uint32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_uint32x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_laneq_u32(vec, lane) vdups_laneq_u32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_laneq_u32 - #define vdups_laneq_u32(vec, lane) simde_vdups_laneq_u32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vdups_laneq_f32(simde_float32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_float32x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdups_laneq_f32(vec, lane) vdups_laneq_f32(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdups_laneq_f32 - #define vdups_laneq_f32(vec, lane) simde_vdups_laneq_f32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vdupd_lane_s64(simde_int64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_int64x1_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_lane_s64(vec, lane) vdupd_lane_s64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_lane_s64 - #define vdupd_lane_s64(vec, lane) simde_vdupd_lane_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vdupd_lane_u64(simde_uint64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_uint64x1_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_lane_u64(vec, lane) vdupd_lane_u64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_lane_u64 - #define vdupd_lane_u64(vec, lane) simde_vdupd_lane_u64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vduph_lane_f16(simde_float16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_float16x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vduph_lane_f16(vec, lane) vduph_lane_f16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_lane_f16 - #define vduph_lane_f16(vec, lane) simde_vduph_lane_f16((vec), (lane)) -#endif - -// simde_vdup_lane_f16 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vdup_lane_f16(vec, lane) vdup_lane_f16(vec, lane) -#else - #define simde_vdup_lane_f16(vec, lane) simde_vdup_n_f16(simde_vduph_lane_f16(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_f16 - #define vdup_lane_f16(vec, lane) simde_vdup_lane_f16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vdup_laneq_f16(simde_float16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_f16(simde_float16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vdup_laneq_f16(vec, lane) vdup_laneq_f16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_f16 - #define vdup_laneq_f16(vec, lane) simde_vdup_laneq_f16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vdupq_lane_f16(simde_float16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_f16(simde_float16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vdupq_lane_f16(vec, lane) vdupq_lane_f16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_f16 - #define vdupq_lane_f16(vec, lane) simde_vdupq_lane_f16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vdupd_lane_f64(simde_float64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_float64x1_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_lane_f64(vec, lane) vdupd_lane_f64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_lane_f64 - #define vdupd_lane_f64(vec, lane) simde_vdupd_lane_f64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vdupd_laneq_s64(simde_int64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_int64x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_laneq_s64(vec, lane) vdupd_laneq_s64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_laneq_s64 - #define vdupd_laneq_s64(vec, lane) simde_vdupd_laneq_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vdupd_laneq_u64(simde_uint64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_uint64x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_laneq_u64(vec, lane) vdupd_laneq_u64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_laneq_u64 - #define vdupd_laneq_u64(vec, lane) simde_vdupd_laneq_u64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vdupd_laneq_f64(simde_float64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_float64x2_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupd_laneq_f64(vec, lane) vdupd_laneq_f64(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupd_laneq_f64 - #define vdupd_laneq_f64(vec, lane) simde_vdupd_laneq_f64((vec), (lane)) -#endif - -//simde_vdup_lane_f32 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_f32(vec, lane) vdup_lane_f32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_f32(vec, lane) (__extension__ ({ \ - simde_float32x2_private simde_vdup_lane_f32_vec_ = simde_float32x2_to_private(vec); \ - simde_float32x2_private simde_vdup_lane_f32_r_; \ - simde_vdup_lane_f32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 8, \ - simde_vdup_lane_f32_vec_.values, \ - simde_vdup_lane_f32_vec_.values, \ - lane, lane \ - ); \ - simde_float32x2_from_private(simde_vdup_lane_f32_r_); \ - })) -#else - #define simde_vdup_lane_f32(vec, lane) simde_vdup_n_f32(simde_vdups_lane_f32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_f32 - #define vdup_lane_f32(vec, lane) simde_vdup_lane_f32((vec), (lane)) -#endif - -//simde_vdup_lane_f64 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_lane_f64(vec, lane) vdup_lane_f64(vec, lane) -#else - #define simde_vdup_lane_f64(vec, lane) simde_vdup_n_f64(simde_vdupd_lane_f64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_f64 - #define vdup_lane_f64(vec, lane) simde_vdup_lane_f64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vdup_lane_s8(simde_int8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_s8(simde_int8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_s8(vec, lane) vdup_lane_s8(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_s8(vec, lane) (__extension__ ({ \ - simde_int8x8_private simde_vdup_lane_s8_vec_ = simde_int8x8_to_private(vec); \ - simde_int8x8_private simde_vdup_lane_s8_r_; \ - simde_vdup_lane_s8_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 8, 8, \ - simde_vdup_lane_s8_vec_.values, \ - simde_vdup_lane_s8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_int8x8_from_private(simde_vdup_lane_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_s8 - #define vdup_lane_s8(vec, lane) simde_vdup_lane_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vdup_lane_s16(simde_int16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdup_n_s16(simde_int16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_s16(vec, lane) vdup_lane_s16(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_s16(vec, lane) (__extension__ ({ \ - simde_int16x4_private simde_vdup_lane_s16_vec_ = simde_int16x4_to_private(vec); \ - simde_int16x4_private simde_vdup_lane_s16_r_; \ - simde_vdup_lane_s16_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 8, \ - simde_vdup_lane_s16_vec_.values, \ - simde_vdup_lane_s16_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_int16x4_from_private(simde_vdup_lane_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_s16 - #define vdup_lane_s16(vec, lane) simde_vdup_lane_s16((vec), (lane)) -#endif - -//simde_vdup_lane_s32 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_s32(vec, lane) vdup_lane_s32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_s32(vec, lane) (__extension__ ({ \ - simde_int32x2_private simde_vdup_lane_s32_vec_ = simde_int32x2_to_private(vec); \ - simde_int32x2_private simde_vdup_lane_s32_r_; \ - simde_vdup_lane_s32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 8, \ - simde_vdup_lane_s32_vec_.values, \ - simde_vdup_lane_s32_vec_.values, \ - lane, lane \ - ); \ - simde_int32x2_from_private(simde_vdup_lane_s32_r_); \ - })) -#else - #define simde_vdup_lane_s32(vec, lane) simde_vdup_n_s32(simde_vdups_lane_s32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_s32 - #define vdup_lane_s32(vec, lane) simde_vdup_lane_s32((vec), (lane)) -#endif - -//simde_vdup_lane_s64 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_s64(vec, lane) vdup_lane_s64(vec, lane) -#else - #define simde_vdup_lane_s64(vec, lane) simde_vdup_n_s64(simde_vdupd_lane_s64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_s64 - #define vdup_lane_s64(vec, lane) simde_vdup_lane_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vdup_lane_u8(simde_uint8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_u8(simde_uint8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_u8(vec, lane) vdup_lane_u8(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_u8(vec, lane) (__extension__ ({ \ - simde_uint8x8_private simde_vdup_lane_u8_vec_ = simde_uint8x8_to_private(vec); \ - simde_uint8x8_private simde_vdup_lane_u8_r_; \ - simde_vdup_lane_u8_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 8, 8, \ - simde_vdup_lane_u8_vec_.values, \ - simde_vdup_lane_u8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_uint8x8_from_private(simde_vdup_lane_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_u8 - #define vdup_lane_u8(vec, lane) simde_vdup_lane_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vdup_lane_u16(simde_uint16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdup_n_u16(simde_uint16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_u16(vec, lane) vdup_lane_u16(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_u16(vec, lane) (__extension__ ({ \ - simde_uint16x4_private simde_vdup_lane_u16_vec_ = simde_uint16x4_to_private(vec); \ - simde_uint16x4_private simde_vdup_lane_u16_r_; \ - simde_vdup_lane_u16_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 8, \ - simde_vdup_lane_u16_vec_.values, \ - simde_vdup_lane_u16_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_uint16x4_from_private(simde_vdup_lane_u16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_u16 - #define vdup_lane_u16(vec, lane) simde_vdup_lane_u16((vec), (lane)) -#endif - -//simde_vdup_lane_u32 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_u32(vec, lane) vdup_lane_u32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vdup_lane_u32(vec, lane) (__extension__ ({ \ - simde_uint32x2_private simde_vdup_lane_u32_vec_ = simde_uint32x2_to_private(vec); \ - simde_uint32x2_private simde_vdup_lane_u32_r_; \ - simde_vdup_lane_u32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 8, \ - simde_vdup_lane_u32_vec_.values, \ - simde_vdup_lane_u32_vec_.values, \ - lane, lane \ - ); \ - simde_uint32x2_from_private(simde_vdup_lane_u32_r_); \ - })) -#else - #define simde_vdup_lane_u32(vec, lane) simde_vdup_n_u32(simde_vdups_lane_u32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_u32 - #define vdup_lane_u32(vec, lane) simde_vdup_lane_u32((vec), (lane)) -#endif - -//simde_vdup_lane_u64 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_u64(vec, lane) vdup_lane_u64(vec, lane) -#else - #define simde_vdup_lane_u64(vec, lane) simde_vdup_n_u64(simde_vdupd_lane_u64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_u64 - #define vdup_lane_u64(vec, lane) simde_vdup_lane_u64((vec), (lane)) -#endif - -//simde_vdup_laneq_f32 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_f32(vec, lane) vdup_laneq_f32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_f32(vec, lane) (__extension__ ({ \ - simde_float32x4_private simde_vdup_laneq_f32_vec_ = simde_float32x4_to_private(vec); \ - simde_float32x2_private simde_vdup_laneq_f32_r_; \ - simde_vdup_laneq_f32_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_f32_vec_.values, \ - simde_vdup_laneq_f32_vec_.values, \ - lane, lane \ - ); \ - simde_float32x2_from_private(simde_vdup_laneq_f32_r_); \ - })) -#else - #define simde_vdup_laneq_f32(vec, lane) simde_vdup_n_f32(simde_vdups_laneq_f32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_f32 - #define vdup_laneq_f32(vec, lane) simde_vdup_laneq_f32((vec), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_f64(vec, lane) vdup_laneq_f64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_f64(vec, lane) (__extension__ ({ \ - simde_float64x2_private simde_vdup_laneq_f64_vec_ = simde_float64x2_to_private(vec); \ - simde_float64x1_private simde_vdup_laneq_f64_r_; \ - simde_vdup_laneq_f64_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_f64_vec_.values, \ - simde_vdup_laneq_f64_vec_.values, \ - lane \ - ); \ - simde_float64x1_from_private(simde_vdup_laneq_f64_r_); \ - })) -#else - #define simde_vdup_laneq_f64(vec, lane) simde_vdup_n_f64(simde_vdupd_laneq_f64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_f64 - #define vdup_laneq_f64(vec, lane) simde_vdup_laneq_f64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vdup_laneq_s8(simde_int8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdup_n_s8(simde_int8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_s8(vec, lane) vdup_laneq_s8(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_s8(vec, lane) (__extension__ ({ \ - simde_int8x16_private simde_vdup_laneq_s8_vec_ = simde_int8x16_to_private(vec); \ - simde_int8x8_private simde_vdup_laneq_s8_r_; \ - simde_vdup_laneq_s8_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_s8_vec_.values, \ - simde_vdup_laneq_s8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_int8x8_from_private(simde_vdup_laneq_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_s8 - #define vdup_laneq_s8(vec, lane) simde_vdup_laneq_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vdup_laneq_s16(simde_int16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_s16(simde_int16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_s16(vec, lane) vdup_laneq_s16(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_s16(vec, lane) (__extension__ ({ \ - simde_int16x8_private simde_vdup_laneq_s16_vec_ = simde_int16x8_to_private(vec); \ - simde_int16x4_private simde_vdup_laneq_s16_r_; \ - simde_vdup_laneq_s16_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_s16_vec_.values, \ - simde_vdup_laneq_s16_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_int16x4_from_private(simde_vdup_laneq_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_s16 - #define vdup_laneq_s16(vec, lane) simde_vdup_laneq_s16((vec), (lane)) -#endif - -//simde_vdup_laneq_s32 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_s32(vec, lane) vdup_laneq_s32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_s32(vec, lane) (__extension__ ({ \ - simde_int32x4_private simde_vdup_laneq_s32_vec_ = simde_int32x4_to_private(vec); \ - simde_int32x2_private simde_vdup_laneq_s32_r_; \ - simde_vdup_laneq_s32_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_s32_vec_.values, \ - simde_vdup_laneq_s32_vec_.values, \ - lane, lane \ - ); \ - simde_int32x2_from_private(simde_vdup_laneq_s32_r_); \ - })) -#else - #define simde_vdup_laneq_s32(vec, lane) simde_vdup_n_s32(simde_vdups_laneq_s32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_s32 - #define vdup_laneq_s32(vec, lane) simde_vdup_laneq_s32((vec), (lane)) -#endif - -//simde_vdup_laneq_s64 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_s64(vec, lane) vdup_laneq_s64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_s64(vec, lane) (__extension__ ({ \ - simde_int64x2_private simde_vdup_laneq_s64_vec_ = simde_int64x2_to_private(vec); \ - simde_int64x1_private simde_vdup_laneq_s64_r_; \ - simde_vdup_laneq_s64_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_s64_vec_.values, \ - simde_vdup_laneq_s64_vec_.values, \ - lane \ - ); \ - simde_int64x1_from_private(simde_vdup_laneq_s64_r_); \ - })) -#else - #define simde_vdup_laneq_s64(vec, lane) simde_vdup_n_s64(simde_vdupd_laneq_s64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_s64 - #define vdup_laneq_s64(vec, lane) simde_vdup_laneq_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vdup_laneq_u8(simde_uint8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdup_n_u8(simde_uint8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_u8(vec, lane) vdup_laneq_u8(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_u8(vec, lane) (__extension__ ({ \ - simde_uint8x16_private simde_vdup_laneq_u8_vec_ = simde_uint8x16_to_private(vec); \ - simde_uint8x8_private simde_vdup_laneq_u8_r_; \ - simde_vdup_laneq_u8_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_u8_vec_.values, \ - simde_vdup_laneq_u8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_uint8x8_from_private(simde_vdup_laneq_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_u8 - #define vdup_laneq_u8(vec, lane) simde_vdup_laneq_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vdup_laneq_u16(simde_uint16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_u16(simde_uint16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_u16(vec, lane) vdup_laneq_u16(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_u16(vec, lane) (__extension__ ({ \ - simde_uint16x8_private simde_vdup_laneq_u16_vec_ = simde_uint16x8_to_private(vec); \ - simde_uint16x4_private simde_vdup_laneq_u16_r_; \ - simde_vdup_laneq_u16_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_u16_vec_.values, \ - simde_vdup_laneq_u16_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_uint16x4_from_private(simde_vdup_laneq_u16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_u16 - #define vdup_laneq_u16(vec, lane) simde_vdup_laneq_u16((vec), (lane)) -#endif - -//simde_vdup_laneq_u32 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_u32(vec, lane) vdup_laneq_u32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_u32(vec, lane) (__extension__ ({ \ - simde_uint32x4_private simde_vdup_laneq_u32_vec_ = simde_uint32x4_to_private(vec); \ - simde_uint32x2_private simde_vdup_laneq_u32_r_; \ - simde_vdup_laneq_u32_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_u32_vec_.values, \ - simde_vdup_laneq_u32_vec_.values, \ - lane, lane \ - ); \ - simde_uint32x2_from_private(simde_vdup_laneq_u32_r_); \ - })) -#else - #define simde_vdup_laneq_u32(vec, lane) simde_vdup_n_u32(simde_vdups_laneq_u32(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_u32 - #define vdup_laneq_u32(vec, lane) simde_vdup_laneq_u32((vec), (lane)) -#endif - -//simde_vdup_laneq_u64 -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_u64(vec, lane) vdup_laneq_u64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdup_laneq_u64(vec, lane) (__extension__ ({ \ - simde_uint64x2_private simde_vdup_laneq_u64_vec_ = simde_uint64x2_to_private(vec); \ - simde_uint64x1_private simde_vdup_laneq_u64_r_; \ - simde_vdup_laneq_u64_r_.values = \ - __builtin_shufflevector( \ - simde_vdup_laneq_u64_vec_.values, \ - simde_vdup_laneq_u64_vec_.values, \ - lane \ - ); \ - simde_uint64x1_from_private(simde_vdup_laneq_u64_r_); \ - })) -#else - #define simde_vdup_laneq_u64(vec, lane) simde_vdup_n_u64(simde_vdupd_laneq_u64(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_u64 - #define vdup_laneq_u64(vec, lane) simde_vdup_laneq_u64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vdupq_lane_f32(simde_float32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_f32(simde_float32x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_f32(vec, lane) vdupq_lane_f32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_f32(vec, lane) (__extension__ ({ \ - simde_float32x2_private simde_vdupq_lane_f32_vec_ = simde_float32x2_to_private(vec); \ - simde_float32x4_private simde_vdupq_lane_f32_r_; \ - simde_vdupq_lane_f32_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_f32_vec_.values, \ - simde_vdupq_lane_f32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_float32x4_from_private(simde_vdupq_lane_f32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_f32 - #define vdupq_lane_f32(vec, lane) simde_vdupq_lane_f32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vdupq_lane_f64(simde_float64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_vdupq_n_f64(simde_float64x1_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_lane_f64(vec, lane) vdupq_lane_f64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_f64(vec, lane) (__extension__ ({ \ - simde_float64x1_private simde_vdupq_lane_f64_vec_ = simde_float64x1_to_private(vec); \ - simde_float64x2_private simde_vdupq_lane_f64_r_; \ - simde_vdupq_lane_f64_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_f64_vec_.values, \ - simde_vdupq_lane_f64_vec_.values, \ - lane, lane \ - ); \ - simde_float64x2_from_private(simde_vdupq_lane_f64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_f64 - #define vdupq_lane_f64(vec, lane) simde_vdupq_lane_f64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vdupq_lane_s8(simde_int8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_s8(simde_int8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_s8(vec, lane) vdupq_lane_s8(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_s8(vec, lane) (__extension__ ({ \ - simde_int8x8_private simde_vdupq_lane_s8_vec_ = simde_int8x8_to_private(vec); \ - simde_int8x16_private simde_vdupq_lane_s8_r_; \ - simde_vdupq_lane_s8_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_s8_vec_.values, \ - simde_vdupq_lane_s8_vec_.values, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane \ - ); \ - simde_int8x16_from_private(simde_vdupq_lane_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_s8 - #define vdupq_lane_s8(vec, lane) simde_vdupq_lane_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vdupq_lane_s16(simde_int16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_s16(simde_int16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_s16(vec, lane) vdupq_lane_s16(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_s16(vec, lane) (__extension__ ({ \ - simde_int16x4_private simde_vdupq_lane_s16_vec_ = simde_int16x4_to_private(vec); \ - simde_int16x8_private simde_vdupq_lane_s16_r_; \ - simde_vdupq_lane_s16_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_s16_vec_.values, \ - simde_vdupq_lane_s16_vec_.values, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane \ - ); \ - simde_int16x8_from_private(simde_vdupq_lane_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_s16 - #define vdupq_lane_s16(vec, lane) simde_vdupq_lane_s16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdupq_lane_s32(simde_int32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_s32(simde_int32x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_s32(vec, lane) vdupq_lane_s32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_s32(vec, lane) (__extension__ ({ \ - simde_int32x2_private simde_vdupq_lane_s32_vec_ = simde_int32x2_to_private(vec); \ - simde_int32x4_private simde_vdupq_lane_s32_r_; \ - simde_vdupq_lane_s32_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_s32_vec_.values, \ - simde_vdupq_lane_s32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_int32x4_from_private(simde_vdupq_lane_s32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_s32 - #define vdupq_lane_s32(vec, lane) simde_vdupq_lane_s32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vdupq_lane_s64(simde_int64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_vdupq_n_s64(simde_int64x1_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_s64(vec, lane) vdupq_lane_s64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_s64(vec, lane) (__extension__ ({ \ - simde_int64x1_private simde_vdupq_lane_s64_vec_ = simde_int64x1_to_private(vec); \ - simde_int64x2_private simde_vdupq_lane_s64_r_; \ - simde_vdupq_lane_s64_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_s64_vec_.values, \ - simde_vdupq_lane_s64_vec_.values, \ - lane, lane \ - ); \ - simde_int64x2_from_private(simde_vdupq_lane_s64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_s64 - #define vdupq_lane_s64(vec, lane) simde_vdupq_lane_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vdupq_lane_u8(simde_uint8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_u8(simde_uint8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_u8(vec, lane) vdupq_lane_u8(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_u8(vec, lane) (__extension__ ({ \ - simde_uint8x8_private simde_vdupq_lane_u8_vec_ = simde_uint8x8_to_private(vec); \ - simde_uint8x16_private simde_vdupq_lane_u8_r_; \ - simde_vdupq_lane_u8_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_u8_vec_.values, \ - simde_vdupq_lane_u8_vec_.values, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane \ - ); \ - simde_uint8x16_from_private(simde_vdupq_lane_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_u8 - #define vdupq_lane_u8(vec, lane) simde_vdupq_lane_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vdupq_lane_u16(simde_uint16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_u16(simde_uint16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_u16(vec, lane) vdupq_lane_u16(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_u16(vec, lane) (__extension__ ({ \ - simde_uint16x4_private simde_vdupq_lane_u16_vec_ = simde_uint16x4_to_private(vec); \ - simde_uint16x8_private simde_vdupq_lane_u16_r_; \ - simde_vdupq_lane_u16_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_u16_vec_.values, \ - simde_vdupq_lane_u16_vec_.values, \ - lane, lane, lane, lane, \ - lane, lane, lane, lane \ - ); \ - simde_uint16x8_from_private(simde_vdupq_lane_u16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_u16 - #define vdupq_lane_u16(vec, lane) simde_vdupq_lane_u16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdupq_lane_u32(simde_uint32x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_u32(simde_uint32x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_u32(vec, lane) vdupq_lane_u32(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_u32(vec, lane) (__extension__ ({ \ - simde_uint32x2_private simde_vdupq_lane_u32_vec_ = simde_uint32x2_to_private(vec); \ - simde_uint32x4_private simde_vdupq_lane_u32_r_; \ - simde_vdupq_lane_u32_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_u32_vec_.values, \ - simde_vdupq_lane_u32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_uint32x4_from_private(simde_vdupq_lane_u32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_u32 - #define vdupq_lane_u32(vec, lane) simde_vdupq_lane_u32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vdupq_lane_u64(simde_uint64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_vdupq_n_u64(simde_uint64x1_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_u64(vec, lane) vdupq_lane_u64(vec, lane) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vdupq_lane_u64(vec, lane) (__extension__ ({ \ - simde_uint64x1_private simde_vdupq_lane_u64_vec_ = simde_uint64x1_to_private(vec); \ - simde_uint64x2_private simde_vdupq_lane_u64_r_; \ - simde_vdupq_lane_u64_r_.values = \ - __builtin_shufflevector( \ - simde_vdupq_lane_u64_vec_.values, \ - simde_vdupq_lane_u64_vec_.values, \ - lane, lane \ - ); \ - simde_uint64x2_from_private(simde_vdupq_lane_u64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_u64 - #define vdupq_lane_u64(vec, lane) simde_vdupq_lane_u64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vdupq_laneq_f16(simde_float16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_f16(simde_float16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vdupq_laneq_f16(vec, lane) vdupq_laneq_f16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_f16 - #define vdupq_laneq_f16(vec, lane) simde_vdupq_laneq_f16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vdupq_laneq_f32(simde_float32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_f32(simde_float32x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_f32(vec, lane) vdupq_laneq_f32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_f32(vec, lane) (__extension__ ({ \ - simde_float32x4_private simde_vdupq_laneq_f32_vec_ = simde_float32x4_to_private(vec); \ - simde_float32x4_private simde_vdupq_laneq_f32_r_; \ - simde_vdupq_laneq_f32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_vdupq_laneq_f32_vec_.values, \ - simde_vdupq_laneq_f32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_float32x4_from_private(simde_vdupq_laneq_f32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_f32 - #define vdupq_laneq_f32(vec, lane) simde_vdupq_laneq_f32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vdupq_laneq_f64(simde_float64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_f64(simde_float64x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_f64(vec, lane) vdupq_laneq_f64(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_f64(vec, lane) (__extension__ ({ \ - simde_float64x2_private simde_vdupq_laneq_f64_vec_ = simde_float64x2_to_private(vec); \ - simde_float64x2_private simde_vdupq_laneq_f64_r_; \ - simde_vdupq_laneq_f64_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_vdupq_laneq_f64_vec_.values, \ - simde_vdupq_laneq_f64_vec_.values, \ - lane, lane \ - ); \ - simde_float64x2_from_private(simde_vdupq_laneq_f64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_f64 - #define vdupq_laneq_f64(vec, lane) simde_vdupq_laneq_f64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vdupq_laneq_s8(simde_int8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdupq_n_s8(simde_int8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_s8(vec, lane) vdupq_laneq_s8(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_s8(vec, lane) (__extension__ ({ \ - simde_int8x16_private simde_vdupq_laneq_s8_vec_ = simde_int8x16_to_private(vec); \ - simde_int8x16_private simde_vdupq_laneq_s8_r_; \ - simde_vdupq_laneq_s8_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 8, 16, \ - simde_vdupq_laneq_s8_vec_.values, \ - simde_vdupq_laneq_s8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_int8x16_from_private(simde_vdupq_laneq_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_s8 - #define vdupq_laneq_s8(vec, lane) simde_vdupq_laneq_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vdupq_laneq_s16(simde_int16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_s16(simde_int16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_s16(vec, lane) vdupq_laneq_s16(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_s16(vec, lane) (__extension__ ({ \ - simde_int16x8_private simde_vdupq_laneq_s16_vec_ = simde_int16x8_to_private(vec); \ - simde_int16x8_private simde_vdupq_laneq_s16_r_; \ - simde_vdupq_laneq_s16_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_vdupq_laneq_s16_vec_.values, \ - simde_vdupq_laneq_s16_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_int16x8_from_private(simde_vdupq_laneq_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_s16 - #define vdupq_laneq_s16(vec, lane) simde_vdupq_laneq_s16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdupq_laneq_s32(simde_int32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_s32(simde_int32x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_s32(vec, lane) vdupq_laneq_s32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_s32(vec, lane) (__extension__ ({ \ - simde_int32x4_private simde_vdupq_laneq_s32_vec_ = simde_int32x4_to_private(vec); \ - simde_int32x4_private simde_vdupq_laneq_s32_r_; \ - simde_vdupq_laneq_s32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_vdupq_laneq_s32_vec_.values, \ - simde_vdupq_laneq_s32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_int32x4_from_private(simde_vdupq_laneq_s32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_s32 - #define vdupq_laneq_s32(vec, lane) simde_vdupq_laneq_s32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vdupq_laneq_s64(simde_int64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_s64(simde_int64x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_s64(vec, lane) vdupq_laneq_s64(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_s64(vec, lane) (__extension__ ({ \ - simde_int64x2_private simde_vdupq_laneq_s64_vec_ = simde_int64x2_to_private(vec); \ - simde_int64x2_private simde_vdupq_laneq_s64_r_; \ - simde_vdupq_laneq_s64_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_vdupq_laneq_s64_vec_.values, \ - simde_vdupq_laneq_s64_vec_.values, \ - lane, lane \ - ); \ - simde_int64x2_from_private(simde_vdupq_laneq_s64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_s64 - #define vdupq_laneq_s64(vec, lane) simde_vdupq_laneq_s64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vdupq_laneq_u8(simde_uint8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdupq_n_u8(simde_uint8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_u8(vec, lane) vdupq_laneq_u8(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_u8(vec, lane) (__extension__ ({ \ - simde_uint8x16_private simde_vdupq_laneq_u8_vec_ = simde_uint8x16_to_private(vec); \ - simde_uint8x16_private simde_vdupq_laneq_u8_r_; \ - simde_vdupq_laneq_u8_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 8, 16, \ - simde_vdupq_laneq_u8_vec_.values, \ - simde_vdupq_laneq_u8_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_uint8x16_from_private(simde_vdupq_laneq_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_u8 - #define vdupq_laneq_u8(vec, lane) simde_vdupq_laneq_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vdupq_laneq_u16(simde_uint16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_u16(simde_uint16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_u16(vec, lane) vdupq_laneq_u16(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_u16(vec, lane) (__extension__ ({ \ - simde_uint16x8_private simde_vdupq_laneq_u16_vec_ = simde_uint16x8_to_private(vec); \ - simde_uint16x8_private simde_vdupq_laneq_u16_r_; \ - simde_vdupq_laneq_u16_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_vdupq_laneq_u16_vec_.values, \ - simde_vdupq_laneq_u16_vec_.values, \ - lane, lane, lane, lane, lane, lane, lane, lane \ - ); \ - simde_uint16x8_from_private(simde_vdupq_laneq_u16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_u16 - #define vdupq_laneq_u16(vec, lane) simde_vdupq_laneq_u16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdupq_laneq_u32(simde_uint32x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_u32(simde_uint32x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_u32(vec, lane) vdupq_laneq_u32(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_u32(vec, lane) (__extension__ ({ \ - simde_uint32x4_private simde_vdupq_laneq_u32_vec_ = simde_uint32x4_to_private(vec); \ - simde_uint32x4_private simde_vdupq_laneq_u32_r_; \ - simde_vdupq_laneq_u32_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_vdupq_laneq_u32_vec_.values, \ - simde_vdupq_laneq_u32_vec_.values, \ - lane, lane, lane, lane \ - ); \ - simde_uint32x4_from_private(simde_vdupq_laneq_u32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_u32 - #define vdupq_laneq_u32(vec, lane) simde_vdupq_laneq_u32((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vdupq_laneq_u64(simde_uint64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_u64(simde_uint64x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_u64(vec, lane) vdupq_laneq_u64(vec, lane) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_vdupq_laneq_u64(vec, lane) (__extension__ ({ \ - simde_uint64x2_private simde_vdupq_laneq_u64_vec_ = simde_uint64x2_to_private(vec); \ - simde_uint64x2_private simde_vdupq_laneq_u64_r_; \ - simde_vdupq_laneq_u64_r_.values = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_vdupq_laneq_u64_vec_.values, \ - simde_vdupq_laneq_u64_vec_.values, \ - lane, lane \ - ); \ - simde_uint64x2_from_private(simde_vdupq_laneq_u64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_u64 - #define vdupq_laneq_u64(vec, lane) simde_vdupq_laneq_u64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vdupb_lane_s8(simde_int8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_int8x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupb_lane_s8(vec, lane) vdupb_lane_s8(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_lane_s8 - #define vdupb_lane_s8(vec, lane) simde_vdupb_lane_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vdupb_lane_u8(simde_uint8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_uint8x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupb_lane_u8(vec, lane) vdupb_lane_u8(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_lane_u8 - #define vdupb_lane_u8(vec, lane) simde_vdupb_lane_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vdupb_laneq_s8(simde_int8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_int8x16_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupb_laneq_s8(vec, lane) vdupb_laneq_s8(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_laneq_s8 - #define vdupb_laneq_s8(vec, lane) simde_vdupb_laneq_s8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vdupb_laneq_u8(simde_uint8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_uint8x16_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupb_laneq_u8(vec, lane) vdupb_laneq_u8(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_laneq_u8 - #define vdupb_laneq_u8(vec, lane) simde_vdupb_laneq_u8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vduph_lane_s16(simde_int16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_int16x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vduph_lane_s16(vec, lane) vduph_lane_s16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_lane_s16 - #define vduph_lane_s16(vec, lane) simde_vduph_lane_s16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vduph_lane_u16(simde_uint16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_uint16x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vduph_lane_u16(vec, lane) vduph_lane_u16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_lane_u16 - #define vduph_lane_u16(vec, lane) simde_vduph_lane_u16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vduph_laneq_s16(simde_int16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_int16x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vduph_laneq_s16(vec, lane) vduph_laneq_s16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_laneq_s16 - #define vduph_laneq_s16(vec, lane) simde_vduph_laneq_s16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vduph_laneq_u16(simde_uint16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_uint16x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vduph_laneq_u16(vec, lane) vduph_laneq_u16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_laneq_u16 - #define vduph_laneq_u16(vec, lane) simde_vduph_laneq_u16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vduph_laneq_f16(simde_float16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_float16x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vduph_laneq_f16(vec, lane) vduph_laneq_f16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_laneq_f16 - #define vduph_laneq_f16(vec, lane) simde_vduph_laneq_f16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vdup_lane_p8(simde_poly8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_p8(simde_poly8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_p8(vec, lane) vdup_lane_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_p8 - #define vdup_lane_p8(vec, lane) simde_vdup_lane_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vdup_lane_p16(simde_poly16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdup_n_p16(simde_poly16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdup_lane_p16(vec, lane) vdup_lane_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_p16 - #define vdup_lane_p16(vec, lane) simde_vdup_lane_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vdup_lane_p64(simde_poly64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_vdup_n_p64(simde_poly64x1_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vdup_lane_p64(vec, lane) vdup_lane_p64((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_p64 - #define vdup_lane_p64(vec, lane) simde_vdup_lane_p64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vdup_laneq_p8(simde_poly8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdup_n_p8(simde_poly8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_p8(vec, lane) vdup_laneq_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_p8 - #define vdup_laneq_p8(vec, lane) simde_vdup_laneq_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vdup_laneq_p16(simde_poly16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_p16(simde_poly16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_p16(vec, lane) vdup_laneq_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_p16 - #define vdup_laneq_p16(vec, lane) simde_vdup_laneq_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vdup_laneq_p64(simde_poly64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdup_n_p64(simde_poly64x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdup_laneq_p64(vec, lane) vdup_laneq_p64((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_p64 - #define vdup_laneq_p64(vec, lane) simde_vdup_laneq_p64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vdupq_lane_p8(simde_poly8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_p8(simde_poly8x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_p8(vec, lane) vdupq_lane_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_p8 - #define vdupq_lane_p8(vec, lane) simde_vdupq_lane_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vdupq_lane_p16(simde_poly16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_p16(simde_poly16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vdupq_lane_p16(vec, lane) vdupq_lane_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_p16 - #define vdupq_lane_p16(vec, lane) simde_vdupq_lane_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vdupq_lane_p64(simde_poly64x1_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return simde_vdupq_n_p64(simde_poly64x1_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vdupq_lane_p64(vec, lane) vdupq_lane_p64((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_p64 - #define vdupq_lane_p64(vec, lane) simde_vdupq_lane_p64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vdupq_laneq_p8(simde_poly8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_vdupq_n_p8(simde_poly8x16_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_p8(vec, lane) vdupq_laneq_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_p8 - #define vdupq_laneq_p8(vec, lane) simde_vdupq_laneq_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vdupq_laneq_p16(simde_poly16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_p16(simde_poly16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_p16(vec, lane) vdupq_laneq_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_p16 - #define vdupq_laneq_p16(vec, lane) simde_vdupq_laneq_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vdupq_laneq_p64(simde_poly64x2_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vdupq_n_p64(simde_poly64x2_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vdupq_laneq_p64(vec, lane) vdupq_laneq_p64((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_p64 - #define vdupq_laneq_p64(vec, lane) simde_vdupq_laneq_p64((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8_t -simde_vdupb_lane_p8(simde_poly8x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_poly8x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vdupb_lane_p8(vec, lane) vdupb_lane_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_lane_p8 - #define vdupb_lane_p8(vec, lane) simde_vdupb_lane_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8_t -simde_vdupb_laneq_p8(simde_poly8x16_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - return simde_poly8x16_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vdupb_laneq_p8(vec, lane) vdupb_laneq_p8((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdupb_laneq_p8 - #define vdupb_laneq_p8(vec, lane) simde_vdupb_laneq_p8((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16_t -simde_vduph_lane_p16(simde_poly16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_poly16x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vduph_lane_p16(vec, lane) vduph_lane_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_lane_p16 - #define vduph_lane_p16(vec, lane) simde_vduph_lane_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16_t -simde_vduph_laneq_p16(simde_poly16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_poly16x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vduph_laneq_p16(vec, lane) vduph_laneq_p16((vec), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vduph_laneq_p16 - #define vduph_laneq_p16(vec, lane) simde_vduph_laneq_p16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16_t -simde_vduph_lane_bf16(simde_bfloat16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_bfloat16x4_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vduph_lane_bf16(vec, lane) vduph_lane_bf16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vduph_lane_bf16 - #define vduph_lane_bf16(vec, lane) simde_vduph_lane_bf16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16_t -simde_vduph_laneq_bf16(simde_bfloat16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_bfloat16x8_to_private(vec).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vduph_laneq_bf16(vec, lane) vduph_laneq_bf16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vduph_laneq_bf16 - #define vduph_laneq_bf16(vec, lane) simde_vduph_laneq_bf16((vec), (lane)) -#endif - -// simde_vdup_lane_bf16 -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vdup_lane_bf16(vec, lane) vdup_lane_bf16(vec, lane) -#else - #define simde_vdup_lane_bf16(vec, lane) simde_vdup_n_bf16(simde_vduph_lane_bf16(vec, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_lane_bf16 - #define vdup_lane_bf16(vec, lane) simde_vdup_lane_bf16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vdup_laneq_bf16(simde_bfloat16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdup_n_bf16(simde_bfloat16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) -#define simde_vdup_laneq_bf16(vec, lane) vdup_laneq_bf16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdup_laneq_bf16 - #define vdup_laneq_bf16(vec, lane) simde_vdup_laneq_bf16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vdupq_lane_bf16(simde_bfloat16x4_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vdupq_n_bf16(simde_bfloat16x4_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) -#define simde_vdupq_lane_bf16(vec, lane) vdupq_lane_bf16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_lane_bf16 - #define vdupq_lane_bf16(vec, lane) simde_vdupq_lane_bf16((vec), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vdupq_laneq_bf16(simde_bfloat16x8_t vec, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vdupq_n_bf16(simde_bfloat16x8_to_private(vec).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vdupq_laneq_bf16(vec, lane) vdupq_laneq_bf16(vec, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdupq_laneq_bf16 - #define vdupq_laneq_bf16(vec, lane) simde_vdupq_laneq_bf16((vec), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_DUP_LANE_H) */ -/* :: End simde/arm/neon/dup_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mul.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MUL_H) -#define SIMDE_ARM_NEON_MUL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulh_f16(a, b); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - - return simde_float16_from_float32(a_ * b_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmulh_f16 - #define vmulh_f16(a, b) simde_vmulh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmul_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmul_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t tmp_a_ = simde_float16_to_float32(a_.values[i]); - simde_float32_t tmp_b_ = simde_float16_to_float32(b_.values[i]); - r_.values[i] = simde_float16_from_float32(tmp_a_ * tmp_b_); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmul_f16 - #define vmul_f16(a, b) simde_vmul_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_f32 - #define vmul_f32(a, b) simde_vmul_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmul_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmul_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_f64 - #define vmul_f64(a, b) simde_vmul_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmul_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_s8 - #define vmul_s8(a, b) simde_vmul_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmul_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _m_pmullw(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_s16 - #define vmul_s16(a, b) simde_vmul_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmul_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_s32 - #define vmul_s32(a, b) simde_vmul_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_x_vmul_s64(simde_int64x1_t a, simde_int64x1_t b) { - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmul_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_u8 - #define vmul_u8(a, b) simde_vmul_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmul_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_u16 - #define vmul_u16(a, b) simde_vmul_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmul_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_u32 - #define vmul_u32(a, b) simde_vmul_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_x_vmul_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t tmp_a_ = simde_float16_to_float32(a_.values[i]); - simde_float32_t tmp_b_ = simde_float16_to_float32(b_.values[i]); - r_.values[i] = simde_float16_from_float32(tmp_a_ * tmp_b_); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_f16 - #define vmulq_f16(a, b) simde_vmulq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_f32(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_mul_ps(a_.m128, b_.m128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_mul(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_f32 - #define vmulq_f32(a, b) simde_vmulq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulq_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_mul_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_mul(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_f64 - #define vmulq_f64(a, b) simde_vmulq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmulq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mul(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/29155682/501126 */ - const __m128i dst_even = _mm_mullo_epi16(a_.m128i, b_.m128i); - r_.m128i = - _mm_or_si128( - _mm_slli_epi16( - _mm_mullo_epi16( - _mm_srli_epi16(a_.m128i, 8), - _mm_srli_epi16(b_.m128i, 8) - ), - 8 - ), - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_and_si128(dst_even, _mm_set1_epi16(0xFF)) - #else - _mm_srli_epi16( - _mm_slli_epi16(dst_even, 8), - 8 - ) - #endif - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_s8 - #define vmulq_s8(a, b) simde_vmulq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmulq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_mullo_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_s16 - #define vmulq_s16(a, b) simde_vmulq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmulq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_mul(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_s32 - #define vmulq_s32(a, b) simde_vmulq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_x_vmulq_s64(simde_int64x2_t a, simde_int64x2_t b) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_mul(a_.v128, b_.v128); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - r_.m128i = _mm_mullo_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmulq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_u8(a, b); - #else - return - simde_vreinterpretq_u8_s8( - simde_vmulq_s8( - simde_vreinterpretq_s8_u8(a), - simde_vreinterpretq_s8_u8(b) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_u8 - #define vmulq_u8(a, b) simde_vmulq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmulq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_u16(a, b); - #else - return - simde_vreinterpretq_u16_s16( - simde_vmulq_s16( - simde_vreinterpretq_s16_u16(a), - simde_vreinterpretq_s16_u16(b) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_u16 - #define vmulq_u16(a, b) simde_vmulq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmulq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_u32(a, b); - #else - return - simde_vreinterpretq_u32_s32( - simde_vmulq_s32( - simde_vreinterpretq_s32_u32(a), - simde_vreinterpretq_s32_u32(b) - ) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_u32 - #define vmulq_u32(a, b) simde_vmulq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - return - simde_vreinterpretq_u64_s64( - simde_x_vmulq_s64( - simde_vreinterpretq_s64_u64(a), - simde_vreinterpretq_s64_u64(b) - ) - ); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vmul_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_p8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(simde_vreinterpret_u8_p8(a)), - b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_p8(b)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint16_t extend_op2 = HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); - uint16_t result = 0; - for(uint16_t j = 0; j < 8; ++j) { - if (a_.values[i] & (1 << j)) { - result = HEDLEY_STATIC_CAST(uint16_t, result ^ (extend_op2 << j)); - } - } - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (result & (0xFF))); - } - - return simde_vreinterpret_p8_u8(simde_uint8x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_p8 - #define vmul_p8(a, b) simde_vmul_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vmulq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_p8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_p8(a)), - b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_p8(b)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint16_t extend_op2 = HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); - uint16_t result = 0; - for(uint16_t j = 0; j < 8; ++j) { - if (a_.values[i] & (1 << j)) { - result = HEDLEY_STATIC_CAST(uint16_t, result ^ (extend_op2 << j)); - } - } - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (result & (0xFF))); - } - - return simde_vreinterpretq_p8_u8(simde_uint8x16_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_p8 - #define vmulq_p8(a, b) simde_vmulq_p8((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MUL_H) */ -/* :: End simde/arm/neon/mul.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_lane_f16 - #define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_lane_f16(r, a, b, lane) vcmla_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_lane_f32 - #define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_lane_f32(r, a, b, lane) vcmla_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_laneq_f16 - #define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_laneq_f16(r, a, b, lane) vcmla_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_laneq_f32 - #define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_laneq_f32(r, a, b, lane) vcmla_laneq_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; - r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; - r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; - r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_lane_f16 - #define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_lane_f16(r, a, b, lane) vcmlaq_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_lane_f32 - #define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_lane_f32(r, a, b, lane) vcmlaq_lane_f32(r, a, b, 0); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); - r_low.values += b_.values * a_low.values; - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; - r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; - r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; - r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_laneq_f16 - #define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_laneq_f16(r, a, b, lane) vcmlaq_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_laneq_f32 - #define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_laneq_f32(r, a, b, lane) vcmlaq_laneq_f32(r, a, b, lane) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ -/* :: End simde/arm/neon/cmla_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot180.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) -#define SIMDE_ARM_NEON_CMLA_ROT180_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot180_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot180_f16(r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) - - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) - - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i])); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_f16 - #define vcmla_rot180_f16(r, a, b) simde_vcmla_rot180_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot180_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot180_f16(r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) - - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) - - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i])); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_f16 - #define vcmlaq_rot180_f16(r, a, b) simde_vcmlaq_rot180_f16(r, a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot180_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot180_f32(r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_f32 - #define vcmla_rot180_f32(r, a, b) simde_vcmla_rot180_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot180_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot180_f32(r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 0, 0, 2, 2); - b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), wasm_f32x4_neg(b_.v128), 0, 1, 2, 3); - r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, -b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_f32 - #define vcmlaq_rot180_f32(r, a, b) simde_vcmlaq_rot180_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcmlaq_rot180_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot180_f64(r, a, b); - #else - simde_float64x2_private - r_ = simde_float64x2_to_private(r), - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 0, 0); - b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), wasm_f64x2_neg(b_.v128), 0, 1); - r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_f64 - #define vcmlaq_rot180_f64(r, a, b) simde_vcmlaq_rot180_f64(r, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) */ -/* :: End simde/arm/neon/cmla_rot180.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot180_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) -#define SIMDE_ARM_NEON_CMLA_ROT180_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_lane_f16 - #define vcmla_rot180_lane_f16(r, a, b, lane) simde_vcmla_rot180_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot180_lane_f16(r, a, b, lane) vcmla_rot180_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_lane_f32 - #define vcmla_rot180_lane_f32(r, a, b, lane) simde_vcmla_rot180_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot180_lane_f32(r, a, b, lane) vcmla_rot180_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; - r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; - r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; - r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_lane_f16 - #define vcmlaq_rot180_lane_f16(r, a, b, lane) simde_vcmlaq_rot180_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot180_lane_f16(r, a, b, lane) vcmlaq_rot180_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_lane_f32 - #define vcmlaq_rot180_lane_f32(r, a, b, lane) simde_vcmlaq_rot180_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot180_lane_f32(r, a, b, lane) vcmlaq_rot180_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_laneq_f16 - #define vcmla_rot180_laneq_f16(r, a, b, lane) simde_vcmla_rot180_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot180_laneq_f16(r, a, b, lane) vcmla_rot180_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_laneq_f32 - #define vcmla_rot180_laneq_f32(r, a, b, lane) simde_vcmla_rot180_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot180_laneq_f32(r, a, b, lane) vcmla_rot180_laneq_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, - const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; - r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; - r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; - r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_laneq_f16 - #define vcmlaq_rot180_laneq_f16(r, a, b, lane) simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) vcmlaq_rot180_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, - const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_laneq_f32 - #define vcmlaq_rot180_laneq_f32(r, a, b, lane) simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) vcmlaq_rot180_laneq_f32(r, a, b, lane) -#endif -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ -/* :: End simde/arm/neon/cmla_rot180_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot270.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) -#define SIMDE_ARM_NEON_CMLA_ROT270_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot270_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot270_f16(r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) - - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_f16 - #define vcmla_rot270_f16(r, a, b) simde_vcmla_rot270_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot270_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot270_f16(r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) - - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_f16 - #define vcmlaq_rot270_f16(r, a, b) simde_vcmlaq_rot270_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot270_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot270_f32(r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_f32 - #define vcmla_rot270_f32(r, a, b) simde_vcmla_rot270_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot270_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot270_f32(r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 1, 1, 3, 3); - b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), b_.v128, 5, 0, 7, 2); - r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_f32 - #define vcmlaq_rot270_f32(r, a, b) simde_vcmlaq_rot270_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcmlaq_rot270_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot270_f64(r, a, b); - #else - simde_float64x2_private - r_ = simde_float64x2_to_private(r), - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 1, 1); - b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), b_.v128, 3, 0); - r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_f64 - #define vcmlaq_rot270_f64(r, a, b) simde_vcmlaq_rot270_f64(r, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) */ -/* :: End simde/arm/neon/cmla_rot270.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot270_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) -#define SIMDE_ARM_NEON_CMLA_ROT270_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_lane_f16 - #define vcmla_rot270_lane_f16(r, a, b, lane) simde_vcmla_rot270_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot270_lane_f16(r, a, b, lane) vcmla_rot270_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_lane_f32 - #define vcmla_rot270_lane_f32(r, a, b, lane) simde_vcmla_rot270_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot270_lane_f32(r, a, b, lane) vcmla_rot270_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; - r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; - r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; - r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_lane_f16 - #define vcmlaq_rot270_lane_f16(r, a, b, lane) simde_vcmlaq_rot270_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot270_lane_f16(r, a, b, lane) vcmlaq_rot270_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_lane_f32 - #define vcmlaq_rot270_lane_f32(r, a, b, lane) simde_vcmlaq_rot270_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot270_lane_f32(r, a, b, lane) vcmlaq_rot270_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_laneq_f16 - #define vcmla_rot270_laneq_f16(r, a, b, lane) simde_vcmla_rot270_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot270_laneq_f16(r, a, b, lane) vcmla_rot270_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_laneq_f32 - #define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot270_laneq_f32(r, a, b, lane) vcmla_rot270_laneq_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, - const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_high.values += b_.values * a_high.values; - r_low.values += b_.values * a_low.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; - r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; - r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; - r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_laneq_f16 - #define vcmlaq_rot270_laneq_f16(r, a, b, lane) simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) vcmlaq_rot270_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, - const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_laneq_f32 - #define vcmlaq_rot270_laneq_f32(r, a, b, lane) simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) vcmlaq_rot270_laneq_f32(r, a, b, lane) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ -/* :: End simde/arm/neon/cmla_rot270_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot90.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) -#define SIMDE_ARM_NEON_CMLA_ROT90_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot90_f16(r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) - - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_f16 - #define vcmla_rot90_f16(r, a, b) simde_vcmla_rot90_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(SIMDE_ARM_NEON_FP16) && defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot90_f16(r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) - - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_f16 - #define vcmlaq_rot90_f16(r, a, b) simde_vcmlaq_rot90_f16(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmla_rot90_f32(r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_f32 - #define vcmla_rot90_f32(r, a, b) simde_vcmla_rot90_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot90_f32(r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 1, 1, 3, 3); - b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), b_.v128, 1, 4, 3, 6); - r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_f32 - #define vcmlaq_rot90_f32(r, a, b) simde_vcmlaq_rot90_f32(r, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcmlaq_rot90_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) && \ - defined(__ARM_FEATURE_COMPLEX) - return vcmlaq_rot90_f64(r, a, b); - #else - simde_float64x2_private - r_ = simde_float64x2_to_private(r), - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 1, 1); - b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), b_.v128, 1, 2); - r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_f64 - #define vcmlaq_rot90_f64(r, a, b) simde_vcmlaq_rot90_f64(r, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) */ -/* :: End simde/arm/neon/cmla_rot90.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cmla_rot90_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Chi-Wei Chu - */ - -#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) -#define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_lane_f16 - #define vcmla_rot90_lane_f16(r, a, b, lane) simde_vcmla_rot90_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_lane_f16(r, a, b, lane) vcmla_rot90_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_lane_f32 - #define vcmla_rot90_lane_f32(r, a, b, lane) simde_vcmla_rot90_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_lane_f32(r, a, b, lane) vcmla_rot90_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f16 - #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_laneq_f16(r, a, b, lane) vcmla_rot90_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f32 - #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_laneq_f32(r, a, b, lane) vcmla_rot90_laneq_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; - r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; - r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; - r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f16 - #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_lane_f16(r, a, b, lane) vcmlaq_rot90_lane_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f32 - #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_lane_f32(r, a, b, lane) vcmlaq_rot90_lane_f32(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) -{ - simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_low.values += b_.values * a_low.values; - r_high.values += b_.values * a_high.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) - { - r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; - r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; - r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; - r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; - } - #endif - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_laneq_f16 - #define vcmlaq_rot90_laneq_f16(r, a, b, lane) simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) vcmlaq_rot90_laneq_f16(r, a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) -{ - simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) - { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_laneq_f32 - #define vcmlaq_rot90_laneq_f32(r, a, b, lane) simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) vcmlaq_rot90_laneq_f32(r, a, b, lane) -#endif -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ -/* :: End simde/arm/neon/cmla_rot90_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cnt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CNT_H) -#define SIMDE_ARM_NEON_CNT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_x_arm_neon_cntb(uint8_t v) { - v = v - ((v >> 1) & (85)); - v = (v & (51)) + ((v >> (2)) & (51)); - v = (v + (v >> (4))) & (15); - return HEDLEY_STATIC_CAST(uint8_t, v) >> (sizeof(uint8_t) - 1) * CHAR_BIT; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vcnt_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcnt_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcnt_s8 - #define vcnt_s8(a) simde_vcnt_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcnt_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcnt_u8(a); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcnt_u8 - #define vcnt_u8(a) simde_vcnt_u8((a)) -#endif - -/* The x86 implementations are stolen from - * https://github.com/WebAssembly/simd/pull/379. They could be cleaned - * up a bit if someone is bored; they're mostly just direct - * translations from the assembly. */ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vcntq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcntq_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a))); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE) - r_.m128i = _mm_popcnt_epi8(a_.m128i); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m128i tmp0 = _mm_set1_epi8(0x0f); - __m128i tmp1 = _mm_andnot_si128(tmp0, a_.m128i); - __m128i y = _mm_and_si128(tmp0, a_.m128i); - tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - tmp1 = _mm_srli_epi16(tmp1, 4); - y = _mm_shuffle_epi8(tmp0, y); - tmp1 = _mm_shuffle_epi8(tmp0, tmp1); - r_.m128i = _mm_add_epi8(y, tmp1); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i tmp0 = _mm_set1_epi8(0x0f); - __m128i tmp1 = a_.m128i; - tmp1 = _mm_and_si128(tmp1, tmp0); - tmp0 = _mm_andnot_si128(tmp0, a_.m128i); - __m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - tmp0 = _mm_srli_epi16(tmp0, 4); - y = _mm_shuffle_epi8(y, tmp1); - tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - tmp1 = _mm_shuffle_epi8(tmp1, tmp0); - r_.m128i = _mm_add_epi8(y, tmp1); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_and_si128(_mm_srli_epi16(a_.m128i, 1), _mm_set1_epi8(0x55)); - a_.m128i = _mm_sub_epi8(a_.m128i, tmp); - tmp = a_.m128i; - a_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x33)); - tmp = _mm_and_si128(_mm_srli_epi16(tmp, 2), _mm_set1_epi8(0x33)); - a_.m128i = _mm_add_epi8(a_.m128i, tmp); - tmp = _mm_srli_epi16(a_.m128i, 4); - a_.m128i = _mm_add_epi8(a_.m128i, tmp); - r_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x0f)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcntq_s8 - #define vcntq_s8(a) simde_vcntq_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcntq_u8(simde_uint8x16_t a) { - return simde_vreinterpretq_u8_s8(simde_vcntq_s8(simde_vreinterpretq_s8_u8(a))); -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcntq_u8 - #define vcntq_u8(a) simde_vcntq_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vcnt_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcnt_p8(a); - #else - return simde_vreinterpret_p8_s8(simde_vcnt_s8(simde_vreinterpret_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcnt_p8 - #define vcnt_p8(a) simde_vcnt_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vcntq_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcntq_p8(a); - #else - return simde_vreinterpretq_p8_s8(simde_vcntq_s8(simde_vreinterpretq_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcntq_p8 - #define vcntq_p8(a) simde_vcntq_p8((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CNT_H) */ -/* :: End simde/arm/neon/cnt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cvt_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CVT_N_H) -#define SIMDE_ARM_NEON_CVT_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvth_n_s16_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_s16_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_s16_f16(a, n) vcvth_n_s16_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_s16_f16 - #define vcvth_n_s16_f16(a, n) simde_vcvth_n_s16_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvth_n_s32_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_s32_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_s32_f16(a, n) vcvth_n_s32_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_s32_f16 - #define vcvth_n_s32_f16(a, n) simde_vcvth_n_s32_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvth_n_s64_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_s64_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_s64_f16(a, n) vcvth_n_s64_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_s64_f16 - #define vcvth_n_s64_f16(a, n) simde_vcvth_n_s64_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvth_n_u16_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_u16_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_u16_f16 - #define vcvth_n_u16_f16(a, n) simde_vcvth_n_u16_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvth_n_u32_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_u32_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_u32_f16(a, n) vcvth_n_u32_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_u32_f16 - #define vcvth_n_u32_f16(a, n) simde_vcvth_n_u32_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvth_n_u64_f16(simde_float16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_vcvth_u64_f16( - simde_float16_from_float32( - simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_u64_f16(a, n) vcvth_n_u64_f16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_u64_f16 - #define vcvth_n_u64_f16(a, n) simde_vcvth_n_u64_f16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_s16(int16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_s16 - #define vcvth_n_f16_s16(a, n) simde_vcvth_n_f16_s16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_s32(a, n) vcvth_n_f16_s32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_s32 - #define vcvth_n_f16_s32(a, n) simde_vcvth_n_f16_s32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_s64(a, n) vcvth_n_f16_s64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_s64 - #define vcvth_n_f16_s64(a, n) simde_vcvth_n_f16_s64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_u16(uint16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_u16 - #define vcvth_n_f16_u16(a, n) simde_vcvth_n_f16_u16(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_u32(uint32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_u32(a, n) vcvth_n_f16_u32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_u32 - #define vcvth_n_f16_u32(a, n) simde_vcvth_n_f16_u32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_n_f16_u64(uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return simde_float16_from_float32( - HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvth_n_f16_u64(a, n) vcvth_n_f16_u64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_n_f16_u64 - #define vcvth_n_f16_u64(a, n) simde_vcvth_n_f16_u64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvts_n_s32_f32(simde_float32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_n_s32_f32 - #define vcvts_n_s32_f32(a, n) simde_vcvts_n_s32_f32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvts_n_u32_f32(simde_float32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_n_u32_f32 - #define vcvts_n_u32_f32(a, n) simde_vcvts_n_u32_f32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vcvts_n_f32_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_n_f32_s32 - #define vcvts_n_f32_s32(a, n) simde_vcvts_n_f32_s32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vcvts_n_f32_u32(uint32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return HEDLEY_STATIC_CAST(simde_float32_t, - HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvts_n_f32_u32 - #define vcvts_n_f32_u32(a, n) simde_vcvts_n_f32_u32(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtd_n_s64_f64(simde_float64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return simde_vcvtd_s64_f64(a * pow(2, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_n_s64_f64 - #define vcvtd_n_s64_f64(a, n) simde_vcvtd_n_s64_f64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtd_n_u64_f64(simde_float64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return simde_vcvtd_u64_f64(a * pow(2, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_n_u64_f64 - #define vcvtd_n_u64_f64(a, n) simde_vcvtd_n_u64_f64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vcvtd_n_f64_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_n_f64_s64 - #define vcvtd_n_f64_s64(a, n) simde_vcvtd_n_f64_s64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vcvtd_n_f64_u64(uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtd_n_f64_u64 - #define vcvtd_n_f64_u64(a, n) simde_vcvtd_n_f64_u64(a, n) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvt_n_s16_f16(simde_float16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); - } - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_s16_f16(a, n) vcvt_n_s16_f16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_s16_f16 - #define vcvt_n_s16_f16(a, n) simde_vcvt_n_s16_f16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); - } - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_s32_f32(a, n) vcvt_n_s32_f32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_s32_f32 - #define vcvt_n_s32_f32(a, n) simde_vcvt_n_s32_f32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * pow(2, n)); - } - - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_s64_f64(a, n) vcvt_n_s64_f64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_s64_f64 - #define vcvt_n_s64_f64(a, n) simde_vcvt_n_s64_f64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); - } - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_u16_f16(a, n) vcvt_n_u16_f16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_u16_f16 - #define vcvt_n_u16_f16(a, n) simde_vcvt_n_u16_f16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); - } - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_u32_f32(a, n) vcvt_n_u32_f32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_u32_f32 - #define vcvt_n_u32_f32(a, n) simde_vcvt_n_u32_f32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * pow(2, n)); - } - - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvt_n_u64_f64(a, n) vcvt_n_u64_f64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_u64_f64 - #define vcvt_n_u64_f64(a, n) simde_vcvt_n_u64_f64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtq_n_s16_f16(simde_float16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_s16_f16(a, n) vcvtq_n_s16_f16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_s16_f16 - #define vcvtq_n_s16_f16(a, n) simde_vcvtq_n_s16_f16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_s32_f32(a, n) vcvtq_n_s32_f32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_s32_f32 - #define vcvtq_n_s32_f32(a, n) simde_vcvtq_n_s32_f32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * pow(2, n)); - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtq_n_s64_f64(a, n) vcvtq_n_s64_f64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_s64_f64 - #define vcvtq_n_s64_f64(a, n) simde_vcvtq_n_s64_f64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) -#define simde_vcvtq_n_u16_f16(a, n) vcvtq_n_u16_f16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_u16_f16 - #define vcvtq_n_u16_f16(a, n) simde_vcvtq_n_u16_f16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvtq_n_u32_f32(a, n) vcvtq_n_u32_f32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_u32_f32 - #define vcvtq_n_u32_f32(a, n) simde_vcvtq_n_u32_f32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * pow(2, n)); - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvtq_n_u64_f64(a, n) vcvtq_n_u64_f64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_u64_f64 - #define vcvtq_n_u64_f64(a, n) simde_vcvtq_n_u64_f64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - simde_float16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_f16_u16(a, n) vcvt_n_f16_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f16_u16 - #define vcvt_n_f16_u16(a, n) simde_vcvt_n_f16_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_float16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_f16_s16(a, n) vcvt_n_f16_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f16_s16 - #define vcvt_n_f16_s16(a, n) simde_vcvt_n_f16_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_float16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_f16_u16(a, n) vcvtq_n_f16_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f16_u16 - #define vcvtq_n_f16_u16(a, n) simde_vcvtq_n_f16_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_float16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_f16_s16(a, n) vcvtq_n_f16_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f16_s16 - #define vcvtq_n_f16_s16(a, n) simde_vcvtq_n_f16_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - simde_float32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_f32_u32(a, n) vcvt_n_f32_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f32_u32 - #define vcvt_n_f32_u32(a, n) simde_vcvt_n_f32_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_float32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_f32_s32(a, n) vcvt_n_f32_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f32_s32 - #define vcvt_n_f32_s32(a, n) simde_vcvt_n_f32_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x1_private a_ = simde_uint64x1_to_private(a); - simde_float64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_f64_u64(a, n) vcvt_n_f64_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f64_u64 - #define vcvt_n_f64_u64(a, n) simde_vcvt_n_f64_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_float64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtq_n_f64_u64(a, n) vcvtq_n_f64_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f64_u64 - #define vcvtq_n_f64_u64(a, n) simde_vcvtq_n_f64_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_float64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_f64_s64(a, n) vcvt_n_f64_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvt_n_f64_s64 - #define vcvt_n_f64_s64(a, n) simde_vcvt_n_f64_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_float64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtq_n_f64_s64(a, n) vcvtq_n_f64_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f64_s64 - #define vcvtq_n_f64_s64(a, n) simde_vcvtq_n_f64_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_float32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_f32_s32(a, n) vcvtq_n_f32_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f32_s32 - #define vcvtq_n_f32_s32(a, n) simde_vcvtq_n_f32_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_float32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_f32_u32(a, n) vcvtq_n_f32_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtq_n_f32_u32 - #define vcvtq_n_f32_u32(a, n) simde_vcvtq_n_f32_u32((a), (n)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_CVT_N_H */ -/* :: End simde/arm/neon/cvt_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cvtm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CVTM_H) -#define SIMDE_ARM_NEON_CVTM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtmh_s64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_s64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { - return INT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_s64_f16 - #define vcvtmh_s64_f16(a) simde_vcvtmh_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtmh_s32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_s32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_s32_f16 - #define vcvtmh_s32_f16(a) simde_vcvtmh_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvtmh_s16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_s16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { - return INT16_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { - return INT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int16_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_s16_f16 - #define vcvtmh_s16_f16(a) simde_vcvtmh_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtmh_u64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_u64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { - return UINT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_u64_f16 - #define vcvtmh_u64_f16(a) simde_vcvtmh_u64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtmh_u32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_u32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_u32_f16 - #define vcvtmh_u32_f16(a) simde_vcvtmh_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvtmh_u16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmh_u16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, - simde_math_floorf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { - return UINT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint16_t, simde_math_floorf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmh_u16_f16 - #define vcvtmh_u16_f16(a) simde_vcvtmh_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtms_s32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtms_s32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtms_s32_f32 - #define vcvtms_s32_f32(a) simde_vcvtms_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtms_u32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtms_u32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(a)); - #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtms_u32_f32 - #define vcvtms_u32_f32(a) simde_vcvtms_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtmd_s64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtmd_s64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_floor(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { - return INT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_floor(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmd_s64_f64 - #define vcvtmd_s64_f64(a) simde_vcvtmd_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtmd_u64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtmd_u64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_floor(a)); - #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_floor(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmd_u64_f64 - #define vcvtmd_u64_f64(a) simde_vcvtmd_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtmq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmq_s16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmh_s16_f16(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_s16_f16 - #define vcvtmq_s16_f16(a) simde_vcvtmq_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtmq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtmq_s32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtms_s32_f32(a_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_s32_f32 - #define vcvtmq_s32_f32(a) simde_vcvtmq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtmq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtmq_s64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmd_s64_f64(a_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_s64_f64 - #define vcvtmq_s64_f64(a) simde_vcvtmq_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtmq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtmq_u16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmh_u16_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_u16_f16 - #define vcvtmq_u16_f16(a) simde_vcvtmq_u16_f16(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtmq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtmq_u32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtps_epu32(a_.m128); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtps_epu32(a_.m128); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtms_u32_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_u32_f32 - #define vcvtmq_u32_f32(a) simde_vcvtmq_u32_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtmq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtmq_u64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmd_u64_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtmq_u64_f64 - #define vcvtmq_u64_f64(a) simde_vcvtmq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvtm_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtm_s16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmh_s16_f16(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_s16_f16 - #define vcvtm_s16_f16(a) simde_vcvtm_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvtm_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtm_u16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmh_u16_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_u16_f16 - #define vcvtm_u16_f16(a) simde_vcvtm_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvtm_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtm_u32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtms_u32_f32(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_u32_f32 - #define vcvtm_u32_f32(a) simde_vcvtm_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvtm_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtm_s32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtms_s32_f32(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_s32_f32 - #define vcvtm_s32_f32(a) simde_vcvtm_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvtm_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtm_s64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmd_s64_f64(a_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_s64_f64 - #define vcvtm_s64_f64(a) simde_vcvtm_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvtm_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtm_u64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtmd_u64_f64(a_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtm_u64_f64 - #define vcvtm_u64_f64(a) simde_vcvtm_u64_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_CVTM_H */ -/* :: End simde/arm/neon/cvtm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cvtn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CVTN_H) -#define SIMDE_ARM_NEON_CVTN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtnq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtnq_s32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtps_epi32(a_.m128); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtps_epi32(a_.m128); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_.values[i])); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_s32_f32 - #define vcvtnq_s32_f32(a) simde_vcvtnq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtnq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtnq_s64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtpd_epi64(a_.m128d); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtpd_epi64(a_.m128d); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a_.values[i])); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_s64_f64 - #define vcvtnq_s64_f64(a) simde_vcvtnq_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtnh_s64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_s64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { - return INT64_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_s64_f16 - #define vcvtnh_s64_f16(a) simde_vcvtnh_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtnh_s32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_s32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_s32_f16 - #define vcvtnh_s32_f16(a) simde_vcvtnh_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvtnh_s16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_s16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { - return INT16_MIN; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { - return INT16_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_s16_f16 - #define vcvtnh_s16_f16(a) simde_vcvtnh_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtnh_u64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_u64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_u64_f16 - #define vcvtnh_u64_f16(a) simde_vcvtnh_u64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtnh_u32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_u32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_u32_f16 - #define vcvtnh_u32_f16(a) simde_vcvtnh_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvtnh_u16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnh_u16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(simde_float16_to_float32(a))); - #else - simde_float32 a_ = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { - return UINT16_MAX; - } else if (simde_math_isnanf(a_)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(a_)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnh_u16_f16 - #define vcvtnh_u16_f16(a) simde_vcvtnh_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtns_s32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtns_s32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); - #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtns_s32_f32 - #define vcvtns_s32_f32(a) simde_vcvtns_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtns_u32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtns_u32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); - #else - if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtns_u32_f32 - #define vcvtns_u32_f32(a) simde_vcvtns_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtnq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtnq_u32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtps_epu32(a_.m128); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtps_epu32(a_.m128); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtns_u32_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_u32_f32 - #define vcvtnq_u32_f32(a) simde_vcvtnq_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtnd_s64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtnd_s64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a)); - #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { - return INT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnd_s64_f64 - #define vcvtnd_s64_f64(a) simde_vcvtnd_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtnd_u64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtnd_u64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a)); - #else - if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT64_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnd_u64_f64 - #define vcvtnd_u64_f64(a) simde_vcvtnd_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtnq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtnq_u64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnd_u64_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_u64_f64 - #define vcvtnq_u64_f64(a) simde_vcvtnq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtnq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnq_s16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnh_s16_f16(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_s16_f16 - #define vcvtnq_s16_f16(a) simde_vcvtnq_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvtn_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtn_s16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnh_s16_f16(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_s16_f16 - #define vcvtn_s16_f16(a) simde_vcvtn_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtnq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtnq_u16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnh_u16_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtnq_u16_f16 - #define vcvtnq_u16_f16(a) simde_vcvtnq_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvtn_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtn_u16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnh_u16_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_u16_f16 - #define vcvtn_u16_f16(a) simde_vcvtn_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvtn_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtn_u32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtns_u32_f32(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_u32_f32 - #define vcvtn_u32_f32(a) simde_vcvtn_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvtn_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtn_s32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtns_s32_f32(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_s32_f32 - #define vcvtn_s32_f32(a) simde_vcvtn_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvtn_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtn_s64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnd_s64_f64(a_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_s64_f64 - #define vcvtn_s64_f64(a) simde_vcvtn_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvtn_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtn_u64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtnd_u64_f64(a_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtn_u64_f64 - #define vcvtn_u64_f64(a) simde_vcvtn_u64_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_CVTN_H */ -/* :: End simde/arm/neon/cvtn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/cvtp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CVTP_H) -#define SIMDE_ARM_NEON_CVTP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtph_s64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_s64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { - return INT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_s64_f16 - #define vcvtph_s64_f16(a) simde_vcvtph_s64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtph_s32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_s32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_s32_f16 - #define vcvtph_s32_f16(a) simde_vcvtph_s32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vcvtph_s16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_s16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { - return INT16_MIN; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { - return INT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int16_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_s16_f16 - #define vcvtph_s16_f16(a) simde_vcvtph_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtph_u64_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_u64_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { - return UINT64_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_u64_f16 - #define vcvtph_u64_f16(a) simde_vcvtph_u64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtph_u32_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_u32_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_u32_f16 - #define vcvtph_u32_f16(a) simde_vcvtph_u32_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vcvtph_u16_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtph_u16_f16(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, - simde_math_ceilf( - simde_float16_to_float32(a))); - #else - simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { - return UINT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint16_t, simde_math_ceilf(af)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtph_u16_f16 - #define vcvtph_u16_f16(a) simde_vcvtph_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vcvtps_s32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtps_s32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { - return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { - return INT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtps_s32_f32 - #define vcvtps_s32_f32(a) simde_vcvtps_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vcvtps_u32_f32(simde_float32 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtps_u32_f32(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(a)); - #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT32_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { - return UINT32_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtps_u32_f32 - #define vcvtps_u32_f32(a) simde_vcvtps_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vcvtpd_s64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtpd_s64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_ceil(a)); - #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { - return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { - return INT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_ceil(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpd_s64_f64 - #define vcvtpd_s64_f64(a) simde_vcvtpd_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vcvtpd_u64_f64(simde_float64 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtpd_u64_f64(a); - #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceil(a)); - #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { - return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { - return UINT64_MAX; - } else if (simde_math_isnan(a)) { - return 0; - } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceil(a)); - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpd_u64_f64 - #define vcvtpd_u64_f64(a) simde_vcvtpd_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcvtpq_s16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtpq_s16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_int16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtph_s16_f16(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_s16_f16 - #define vcvtpq_s16_f16(a) simde_vcvtpq_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcvtpq_s32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtpq_s32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_int32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtps_s32_f32(a_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_s32_f32 - #define vcvtpq_s32_f32(a) simde_vcvtpq_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcvtpq_s64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtpq_s64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_int64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtpd_s64_f64(a_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_s64_f64 - #define vcvtpq_s64_f64(a) simde_vcvtpq_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcvtpq_u16_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtpq_u16_f16(a); - #else - simde_float16x8_private a_ = simde_float16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtph_u16_f16(a_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_u16_f16 - #define vcvtpq_u16_f16(a) simde_vcvtpq_u16_f16(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcvtpq_u32_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - return vcvtpq_u32_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - simde_uint32x4_private r_; - - #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtps_epu32(a_.m128); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtps_epu32(a_.m128); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtps_u32_f32(a_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_u32_f32 - #define vcvtpq_u32_f32(a) simde_vcvtpq_u32_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcvtpq_u64_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtpq_u64_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - simde_uint64x2_private r_; - - #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - // Hmm.. this doesn't work, unlike the signed versions - if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { - unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); - _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - _MM_SET_ROUNDING_MODE(rounding_mode); - } else { - r_.m128i = _mm_cvtpd_epu64(a_.m128d); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtpd_u64_f64(a_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtpq_u64_f64 - #define vcvtpq_u64_f64(a) simde_vcvtpq_u64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcvtp_s16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtp_s16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_int16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtph_s16_f16(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_s16_f16 - #define vcvtp_s16_f16(a) simde_vcvtp_s16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcvtp_u16_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvtp_u16_f16(a); - #else - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint16x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtph_u16_f16(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_u16_f16 - #define vcvtp_u16_f16(a) simde_vcvtp_u16_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcvtp_u32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtp_u32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_uint32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtps_u32_f32(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_u32_f32 - #define vcvtp_u32_f32(a) simde_vcvtp_u32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcvtp_s32_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcvtp_s32_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - simde_int32x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtps_s32_f32(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_s32_f32 - #define vcvtp_s32_f32(a) simde_vcvtp_s32_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcvtp_s64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtp_s64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_int64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtpd_s64_f64(a_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_s64_f64 - #define vcvtp_s64_f64(a) simde_vcvtp_s64_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcvtp_u64_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcvtp_u64_f64(a); - #else - simde_float64x1_private a_ = simde_float64x1_to_private(a); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvtpd_u64_f64(a_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvtp_u64_f64 - #define vcvtp_u64_f64(a) simde_vcvtp_u64_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_CVTP_H */ -/* :: End simde/arm/neon/cvtp.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/copy_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_COPY_LANE_H) -#define SIMDE_ARM_NEON_COPY_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vcopy_lane_s8(simde_int8x8_t a, const int lane1, simde_int8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_int8x8_private - b_ = simde_int8x8_to_private(b), - r_ = simde_int8x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_s8(a, lane1, b, lane2) vcopy_lane_s8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_s8 - #define vcopy_lane_s8(a, lane1, b, lane2) simde_vcopy_lane_s8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcopy_lane_s16(simde_int16x4_t a, const int lane1, simde_int16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_int16x4_private - b_ = simde_int16x4_to_private(b), - r_ = simde_int16x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_s16(a, lane1, b, lane2) vcopy_lane_s16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_s16 - #define vcopy_lane_s16(a, lane1, b, lane2) simde_vcopy_lane_s16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcopy_lane_s32(simde_int32x2_t a, const int lane1, simde_int32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_int32x2_private - b_ = simde_int32x2_to_private(b), - r_ = simde_int32x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_s32(a, lane1, b, lane2) vcopy_lane_s32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_s32 - #define vcopy_lane_s32(a, lane1, b, lane2) simde_vcopy_lane_s32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcopy_lane_s64(simde_int64x1_t a, const int lane1, simde_int64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_int64x1_private - b_ = simde_int64x1_to_private(b), - r_ = simde_int64x1_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_s64(a, lane1, b, lane2) vcopy_lane_s64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_s64 - #define vcopy_lane_s64(a, lane1, b, lane2) simde_vcopy_lane_s64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcopy_lane_u8(simde_uint8x8_t a, const int lane1, simde_uint8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_uint8x8_private - b_ = simde_uint8x8_to_private(b), - r_ = simde_uint8x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_u8(a, lane1, b, lane2) vcopy_lane_u8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_u8 - #define vcopy_lane_u8(a, lane1, b, lane2) simde_vcopy_lane_u8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcopy_lane_u16(simde_uint16x4_t a, const int lane1, simde_uint16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_uint16x4_private - b_ = simde_uint16x4_to_private(b), - r_ = simde_uint16x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_u16(a, lane1, b, lane2) vcopy_lane_u16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_u16 - #define vcopy_lane_u16(a, lane1, b, lane2) simde_vcopy_lane_u16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcopy_lane_u32(simde_uint32x2_t a, const int lane1, simde_uint32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_uint32x2_private - b_ = simde_uint32x2_to_private(b), - r_ = simde_uint32x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_u32(a, lane1, b, lane2) vcopy_lane_u32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_u32 - #define vcopy_lane_u32(a, lane1, b, lane2) simde_vcopy_lane_u32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcopy_lane_u64(simde_uint64x1_t a, const int lane1, simde_uint64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_uint64x1_private - b_ = simde_uint64x1_to_private(b), - r_ = simde_uint64x1_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_u64(a, lane1, b, lane2) vcopy_lane_u64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_u64 - #define vcopy_lane_u64(a, lane1, b, lane2) simde_vcopy_lane_u64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcopy_lane_f32(simde_float32x2_t a, const int lane1, simde_float32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_float32x2_private - b_ = simde_float32x2_to_private(b), - r_ = simde_float32x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_f32(a, lane1, b, lane2) vcopy_lane_f32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_f32 - #define vcopy_lane_f32(a, lane1, b, lane2) simde_vcopy_lane_f32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcopy_lane_f64(simde_float64x1_t a, const int lane1, simde_float64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_float64x1_private - b_ = simde_float64x1_to_private(b), - r_ = simde_float64x1_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_lane_f64(a, lane1, b, lane2) vcopy_lane_f64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_f64 - #define vcopy_lane_f64(a, lane1, b, lane2) simde_vcopy_lane_f64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vcopy_laneq_s8(simde_int8x8_t a, const int lane1, simde_int8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_int8x8_private - r_ = simde_int8x8_to_private(a); - simde_int8x16_private - b_ = simde_int8x16_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_s8(a, lane1, b, lane2) vcopy_laneq_s8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_s8 - #define vcopy_laneq_s8(a, lane1, b, lane2) simde_vcopy_laneq_s8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcopy_laneq_s16(simde_int16x4_t a, const int lane1, simde_int16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_int16x4_private - r_ = simde_int16x4_to_private(a); - simde_int16x8_private - b_ = simde_int16x8_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_s16(a, lane1, b, lane2) vcopy_laneq_s16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_s16 - #define vcopy_laneq_s16(a, lane1, b, lane2) simde_vcopy_laneq_s16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcopy_laneq_s32(simde_int32x2_t a, const int lane1, simde_int32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_int32x2_private - r_ = simde_int32x2_to_private(a); - simde_int32x4_private - b_ = simde_int32x4_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_s32(a, lane1, b, lane2) vcopy_laneq_s32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_s32 - #define vcopy_laneq_s32(a, lane1, b, lane2) simde_vcopy_laneq_s32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcopy_laneq_s64(simde_int64x1_t a, const int lane1, simde_int64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_int64x1_private - r_ = simde_int64x1_to_private(a); - simde_int64x2_private - b_ = simde_int64x2_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_s64(a, lane1, b, lane2) vcopy_laneq_s64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_s64 - #define vcopy_laneq_s64(a, lane1, b, lane2) simde_vcopy_laneq_s64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcopy_laneq_u8(simde_uint8x8_t a, const int lane1, simde_uint8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_uint8x8_private - r_ = simde_uint8x8_to_private(a); - simde_uint8x16_private - b_ = simde_uint8x16_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_u8(a, lane1, b, lane2) vcopy_laneq_u8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_u8 - #define vcopy_laneq_u8(a, lane1, b, lane2) simde_vcopy_laneq_u8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcopy_laneq_u16(simde_uint16x4_t a, const int lane1, simde_uint16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_uint16x4_private - r_ = simde_uint16x4_to_private(a); - simde_uint16x8_private - b_ = simde_uint16x8_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_u16(a, lane1, b, lane2) vcopy_laneq_u16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_u16 - #define vcopy_laneq_u16(a, lane1, b, lane2) simde_vcopy_laneq_u16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcopy_laneq_u32(simde_uint32x2_t a, const int lane1, simde_uint32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_uint32x2_private - r_ = simde_uint32x2_to_private(a); - simde_uint32x4_private - b_ = simde_uint32x4_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_u32(a, lane1, b, lane2) vcopy_laneq_u32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_u32 - #define vcopy_laneq_u32(a, lane1, b, lane2) simde_vcopy_laneq_u32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcopy_laneq_u64(simde_uint64x1_t a, const int lane1, simde_uint64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_uint64x1_private - r_ = simde_uint64x1_to_private(a); - simde_uint64x2_private - b_ = simde_uint64x2_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_u64(a, lane1, b, lane2) vcopy_laneq_u64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_u64 - #define vcopy_laneq_u64(a, lane1, b, lane2) simde_vcopy_laneq_u64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcopy_laneq_f32(simde_float32x2_t a, const int lane1, simde_float32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_float32x2_private - r_ = simde_float32x2_to_private(a); - simde_float32x4_private - b_ = simde_float32x4_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_f32(a, lane1, b, lane2) vcopy_laneq_f32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_f32 - #define vcopy_laneq_f32(a, lane1, b, lane2) simde_vcopy_laneq_f32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcopy_laneq_f64(simde_float64x1_t a, const int lane1, simde_float64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_float64x1_private - r_ = simde_float64x1_to_private(a); - simde_float64x2_private - b_ = simde_float64x2_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopy_laneq_f64(a, lane1, b, lane2) vcopy_laneq_f64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_f64 - #define vcopy_laneq_f64(a, lane1, b, lane2) simde_vcopy_laneq_f64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vcopyq_lane_s8(simde_int8x16_t a, const int lane1, simde_int8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_int8x8_private - b_ = simde_int8x8_to_private(b); - simde_int8x16_private - r_ = simde_int8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_s8(a, lane1, b, lane2) vcopyq_lane_s8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_s8 - #define vcopyq_lane_s8(a, lane1, b, lane2) simde_vcopyq_lane_s8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcopyq_lane_s16(simde_int16x8_t a, const int lane1, simde_int16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_int16x4_private - b_ = simde_int16x4_to_private(b); - simde_int16x8_private - r_ = simde_int16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_s16(a, lane1, b, lane2) vcopyq_lane_s16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_s16 - #define vcopyq_lane_s16(a, lane1, b, lane2) simde_vcopyq_lane_s16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcopyq_lane_s32(simde_int32x4_t a, const int lane1, simde_int32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_int32x2_private - b_ = simde_int32x2_to_private(b); - simde_int32x4_private - r_ = simde_int32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_s32(a, lane1, b, lane2) vcopyq_lane_s32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_s32 - #define vcopyq_lane_s32(a, lane1, b, lane2) simde_vcopyq_lane_s32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcopyq_lane_s64(simde_int64x2_t a, const int lane1, simde_int64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_int64x1_private - b_ = simde_int64x1_to_private(b); - simde_int64x2_private - r_ = simde_int64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_s64(a, lane1, b, lane2) vcopyq_lane_s64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_s64 - #define vcopyq_lane_s64(a, lane1, b, lane2) simde_vcopyq_lane_s64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcopyq_lane_u8(simde_uint8x16_t a, const int lane1, simde_uint8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_uint8x8_private - b_ = simde_uint8x8_to_private(b); - simde_uint8x16_private - r_ = simde_uint8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_u8(a, lane1, b, lane2) vcopyq_lane_u8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_u8 - #define vcopyq_lane_u8(a, lane1, b, lane2) simde_vcopyq_lane_u8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcopyq_lane_u16(simde_uint16x8_t a, const int lane1, simde_uint16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_uint16x4_private - b_ = simde_uint16x4_to_private(b); - simde_uint16x8_private - r_ = simde_uint16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_u16(a, lane1, b, lane2) vcopyq_lane_u16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_u16 - #define vcopyq_lane_u16(a, lane1, b, lane2) simde_vcopyq_lane_u16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcopyq_lane_u32(simde_uint32x4_t a, const int lane1, simde_uint32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_uint32x2_private - b_ = simde_uint32x2_to_private(b); - simde_uint32x4_private - r_ = simde_uint32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_u32(a, lane1, b, lane2) vcopyq_lane_u32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_u32 - #define vcopyq_lane_u32(a, lane1, b, lane2) simde_vcopyq_lane_u32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcopyq_lane_u64(simde_uint64x2_t a, const int lane1, simde_uint64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_uint64x1_private - b_ = simde_uint64x1_to_private(b); - simde_uint64x2_private - r_ = simde_uint64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_u64(a, lane1, b, lane2) vcopyq_lane_u64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_u64 - #define vcopyq_lane_u64(a, lane1, b, lane2) simde_vcopyq_lane_u64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcopyq_lane_f32(simde_float32x4_t a, const int lane1, simde_float32x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_float32x2_private - b_ = simde_float32x2_to_private(b); - simde_float32x4_private - r_ = simde_float32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_f32(a, lane1, b, lane2) vcopyq_lane_f32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_f32 - #define vcopyq_lane_f32(a, lane1, b, lane2) simde_vcopyq_lane_f32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcopyq_lane_f64(simde_float64x2_t a, const int lane1, simde_float64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_float64x1_private - b_ = simde_float64x1_to_private(b); - simde_float64x2_private - r_ = simde_float64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_lane_f64(a, lane1, b, lane2) vcopyq_lane_f64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_f64 - #define vcopyq_lane_f64(a, lane1, b, lane2) simde_vcopyq_lane_f64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vcopyq_laneq_s8(simde_int8x16_t a, const int lane1, simde_int8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_int8x16_private - b_ = simde_int8x16_to_private(b), - r_ = simde_int8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_s8(a, lane1, b, lane2) vcopyq_laneq_s8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_s8 - #define vcopyq_laneq_s8(a, lane1, b, lane2) simde_vcopyq_laneq_s8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vcopyq_laneq_s16(simde_int16x8_t a, const int lane1, simde_int16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_int16x8_private - b_ = simde_int16x8_to_private(b), - r_ = simde_int16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_s16(a, lane1, b, lane2) vcopyq_laneq_s16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_s16 - #define vcopyq_laneq_s16(a, lane1, b, lane2) simde_vcopyq_laneq_s16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vcopyq_laneq_s32(simde_int32x4_t a, const int lane1, simde_int32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_int32x4_private - b_ = simde_int32x4_to_private(b), - r_ = simde_int32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_s32(a, lane1, b, lane2) vcopyq_laneq_s32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_s32 - #define vcopyq_laneq_s32(a, lane1, b, lane2) simde_vcopyq_laneq_s32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vcopyq_laneq_s64(simde_int64x2_t a, const int lane1, simde_int64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_int64x2_private - b_ = simde_int64x2_to_private(b), - r_ = simde_int64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_s64(a, lane1, b, lane2) vcopyq_laneq_s64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_s64 - #define vcopyq_laneq_s64(a, lane1, b, lane2) simde_vcopyq_laneq_s64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vcopyq_laneq_u8(simde_uint8x16_t a, const int lane1, simde_uint8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_uint8x16_private - b_ = simde_uint8x16_to_private(b), - r_ = simde_uint8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_u8(a, lane1, b, lane2) vcopyq_laneq_u8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_u8 - #define vcopyq_laneq_u8(a, lane1, b, lane2) simde_vcopyq_laneq_u8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vcopyq_laneq_u16(simde_uint16x8_t a, const int lane1, simde_uint16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_uint16x8_private - b_ = simde_uint16x8_to_private(b), - r_ = simde_uint16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_u16(a, lane1, b, lane2) vcopyq_laneq_u16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_u16 - #define vcopyq_laneq_u16(a, lane1, b, lane2) simde_vcopyq_laneq_u16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vcopyq_laneq_u32(simde_uint32x4_t a, const int lane1, simde_uint32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_uint32x4_private - b_ = simde_uint32x4_to_private(b), - r_ = simde_uint32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_u32(a, lane1, b, lane2) vcopyq_laneq_u32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_u32 - #define vcopyq_laneq_u32(a, lane1, b, lane2) simde_vcopyq_laneq_u32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vcopyq_laneq_u64(simde_uint64x2_t a, const int lane1, simde_uint64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_uint64x2_private - b_ = simde_uint64x2_to_private(b), - r_ = simde_uint64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_u64(a, lane1, b, lane2) vcopyq_laneq_u64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_u64 - #define vcopyq_laneq_u64(a, lane1, b, lane2) simde_vcopyq_laneq_u64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcopyq_laneq_f32(simde_float32x4_t a, const int lane1, simde_float32x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_float32x4_private - b_ = simde_float32x4_to_private(b), - r_ = simde_float32x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_f32(a, lane1, b, lane2) vcopyq_laneq_f32((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_f32 - #define vcopyq_laneq_f32(a, lane1, b, lane2) simde_vcopyq_laneq_f32((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcopyq_laneq_f64(simde_float64x2_t a, const int lane1, simde_float64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_float64x2_private - b_ = simde_float64x2_to_private(b), - r_ = simde_float64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcopyq_laneq_f64(a, lane1, b, lane2) vcopyq_laneq_f64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_f64 - #define vcopyq_laneq_f64(a, lane1, b, lane2) simde_vcopyq_laneq_f64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vcopy_lane_p8(simde_poly8x8_t a, const int lane1, simde_poly8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_poly8x8_private - b_ = simde_poly8x8_to_private(b), - r_ = simde_poly8x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_lane_p8(a, lane1, b, lane2) vcopy_lane_p8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_p8 - #define vcopy_lane_p8(a, lane1, b, lane2) simde_vcopy_lane_p8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vcopy_lane_p16(simde_poly16x4_t a, const int lane1, simde_poly16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_poly16x4_private - b_ = simde_poly16x4_to_private(b), - r_ = simde_poly16x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_lane_p16(a, lane1, b, lane2) vcopy_lane_p16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_p16 - #define vcopy_lane_p16(a, lane1, b, lane2) simde_vcopy_lane_p16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vcopy_lane_p64(simde_poly64x1_t a, const int lane1, simde_poly64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_poly64x1_private - b_ = simde_poly64x1_to_private(b), - r_ = simde_poly64x1_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_lane_p64(a, lane1, b, lane2) vcopy_lane_p64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_p64 - #define vcopy_lane_p64(a, lane1, b, lane2) simde_vcopy_lane_p64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vcopy_laneq_p8(simde_poly8x8_t a, const int lane1, simde_poly8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_poly8x8_private - r_ = simde_poly8x8_to_private(a); - simde_poly8x16_private - b_ = simde_poly8x16_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_laneq_p8(a, lane1, b, lane2) vcopy_laneq_p8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_p8 - #define vcopy_laneq_p8(a, lane1, b, lane2) simde_vcopy_laneq_p8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vcopy_laneq_p16(simde_poly16x4_t a, const int lane1, simde_poly16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_poly16x4_private - r_ = simde_poly16x4_to_private(a); - simde_poly16x8_private - b_ = simde_poly16x8_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_laneq_p16(a, lane1, b, lane2) vcopy_laneq_p16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_p16 - #define vcopy_laneq_p16(a, lane1, b, lane2) simde_vcopy_laneq_p16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vcopy_laneq_p64(simde_poly64x1_t a, const int lane1, simde_poly64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_poly64x1_private - r_ = simde_poly64x1_to_private(a); - simde_poly64x2_private - b_ = simde_poly64x2_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopy_laneq_p64(a, lane1, b, lane2) vcopy_laneq_p64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_p64 - #define vcopy_laneq_p64(a, lane1, b, lane2) simde_vcopy_laneq_p64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vcopyq_lane_p8(simde_poly8x16_t a, const int lane1, simde_poly8x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_poly8x8_private - b_ = simde_poly8x8_to_private(b); - simde_poly8x16_private - r_ = simde_poly8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_lane_p8(a, lane1, b, lane2) vcopyq_lane_p8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_p8 - #define vcopyq_lane_p8(a, lane1, b, lane2) simde_vcopyq_lane_p8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vcopyq_lane_p16(simde_poly16x8_t a, const int lane1, simde_poly16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_poly16x4_private - b_ = simde_poly16x4_to_private(b); - simde_poly16x8_private - r_ = simde_poly16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_lane_p16(a, lane1, b, lane2) vcopyq_lane_p16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_p16 - #define vcopyq_lane_p16(a, lane1, b, lane2) simde_vcopyq_lane_p16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vcopyq_lane_p64(simde_poly64x2_t a, const int lane1, simde_poly64x1_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { - simde_poly64x1_private - b_ = simde_poly64x1_to_private(b); - simde_poly64x2_private - r_ = simde_poly64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_lane_p64(a, lane1, b, lane2) vcopyq_lane_p64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_p64 - #define vcopyq_lane_p64(a, lane1, b, lane2) simde_vcopyq_lane_p64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vcopyq_laneq_p8(simde_poly8x16_t a, const int lane1, simde_poly8x16_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { - simde_poly8x16_private - b_ = simde_poly8x16_to_private(b), - r_ = simde_poly8x16_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_laneq_p8(a, lane1, b, lane2) vcopyq_laneq_p8((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_p8 - #define vcopyq_laneq_p8(a, lane1, b, lane2) simde_vcopyq_laneq_p8((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vcopyq_laneq_p16(simde_poly16x8_t a, const int lane1, simde_poly16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_poly16x8_private - b_ = simde_poly16x8_to_private(b), - r_ = simde_poly16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_laneq_p16(a, lane1, b, lane2) vcopyq_laneq_p16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_p16 - #define vcopyq_laneq_p16(a, lane1, b, lane2) simde_vcopyq_laneq_p16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vcopyq_laneq_p64(simde_poly64x2_t a, const int lane1, simde_poly64x2_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { - simde_poly64x2_private - b_ = simde_poly64x2_to_private(b), - r_ = simde_poly64x2_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_poly64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vcopyq_laneq_p64(a, lane1, b, lane2) vcopyq_laneq_p64((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_p64 - #define vcopyq_laneq_p64(a, lane1, b, lane2) simde_vcopyq_laneq_p64((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vcopy_lane_bf16(simde_bfloat16x4_t a, const int lane1, simde_bfloat16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_bfloat16x4_private - b_ = simde_bfloat16x4_to_private(b), - r_ = simde_bfloat16x4_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_bfloat16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vcopy_lane_bf16(a, lane1, b, lane2) vcopy_lane_bf16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_lane_bf16 - #define vcopy_lane_bf16(a, lane1, b, lane2) simde_vcopy_lane_bf16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vcopy_laneq_bf16(simde_bfloat16x4_t a, const int lane1, simde_bfloat16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_bfloat16x4_private r_ = simde_bfloat16x4_to_private(a); - simde_bfloat16x8_private b_ = simde_bfloat16x8_to_private(b); - - r_.values[lane1] = b_.values[lane2]; - return simde_bfloat16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vcopy_laneq_bf16(a, lane1, b, lane2) vcopy_laneq_bf16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopy_laneq_bf16 - #define vcopy_laneq_bf16(a, lane1, b, lane2) simde_vcopy_laneq_bf16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vcopyq_lane_bf16(simde_bfloat16x8_t a, const int lane1, simde_bfloat16x4_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { - simde_bfloat16x4_private b_ = simde_bfloat16x4_to_private(b); - simde_bfloat16x8_private r_ = simde_bfloat16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_bfloat16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vcopyq_lane_bf16(a, lane1, b, lane2) vcopyq_lane_bf16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_lane_bf16 - #define vcopyq_lane_bf16(a, lane1, b, lane2) simde_vcopyq_lane_bf16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vcopyq_laneq_bf16(simde_bfloat16x8_t a, const int lane1, simde_bfloat16x8_t b, const int lane2) - SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) - SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { - simde_bfloat16x8_private - b_ = simde_bfloat16x8_to_private(b), - r_ = simde_bfloat16x8_to_private(a); - - r_.values[lane1] = b_.values[lane2]; - return simde_bfloat16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vcopyq_laneq_bf16(a, lane1, b, lane2) vcopyq_laneq_bf16((a), (lane1), (b), (lane2)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcopyq_laneq_bf16 - #define vcopyq_laneq_bf16(a, lane1, b, lane2) simde_vcopyq_laneq_bf16((a), (lane1), (b), (lane2)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_COPY_LANE_H */ -/* :: End simde/arm/neon/copy_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/crc32.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CRC32_H) -#define SIMDE_ARM_NEON_CRC32_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t simde_crc32_reverseBits(uint64_t num, int num_of_bits) -{ - uint64_t reverse_num = 0; - for (int i = 0; i < num_of_bits; i++) { - if (num & (1ULL << i)) - reverse_num |= 1ULL << (num_of_bits - 1 - i); - } - return reverse_num; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t simde_crc32_eor_mask(uint32_t a, uint32_t b, uint32_t mask) { - uint32_t part_a = a & mask; - uint32_t part_result = part_a ^ b; - uint32_t result = (a & ~mask) | part_result; - return result; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32b(uint32_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32b(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, (simde_crc32_reverseBits(b, 8) << 24)); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x04C11DB7; - for(int i = 31; i >= 24; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - uint32_t result = ((head & 0x00FFFFFF) << 8) | ((tail & 0xFF000000) >> 24); - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(result, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32b - #define __crc32b(a, b) simde___crc32b((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32h(uint32_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32h(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, (simde_crc32_reverseBits(b, 16) << 16)); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x04C11DB7; - for(int i = 31; i >= 16; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - uint32_t result = ((head & 0x0000FFFF) << 16) | ((tail & 0xFFFF0000) >> 16); - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(result, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32h - #define __crc32h(a, b) simde___crc32h((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32w(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32w(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(b, 32)); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x04C11DB7; - for(int i = 31; i >= 0; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(tail, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32w - #define __crc32w(a, b) simde___crc32w((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32d(uint32_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32d(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint64_t r_val = simde_crc32_reverseBits(b, 64); - uint32_t val_head = HEDLEY_STATIC_CAST(uint32_t, r_val >> 32); - uint32_t val_mid = HEDLEY_STATIC_CAST(uint32_t, r_val & 0x00000000FFFFFFFF); - uint32_t head = r_acc ^ val_head; - uint32_t mid = 0u ^ val_mid; - uint32_t tail = 0u; - const uint32_t poly = 0x04C11DB7; - for(int i = 31; i >= 0; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - mid = simde_crc32_eor_mask(mid, poly << i, 0xFFFFFFFF); - tail = simde_crc32_eor_mask(tail, 0x0, 0xFFFFFFFF); - } - } - for(int i = 31; i >= 0; --i) { - if ((mid>>i) & 1) { - mid = simde_crc32_eor_mask(mid, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(tail, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32d - #define __crc32d(a, b) simde___crc32d((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32cb(uint32_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32cb(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, (simde_crc32_reverseBits(b, 8) << 24)); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x1EDC6F41; - for(int i = 31; i >= 24; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - uint32_t result = ((head & 0x00FFFFFF) << 8) | ((tail & 0xFF000000) >> 24); - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(result, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32cb - #define __crc32cb(a, b) simde___crc32cb((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32ch(uint32_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32ch(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(b, 16) << 16); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x1EDC6F41; - for(int i = 31; i >= 16; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - uint32_t result = ((head & 0x0000FFFF) << 16) | ((tail & 0xFFFF0000) >> 16); - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(result, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32ch - #define __crc32ch(a, b) simde___crc32ch((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32cw(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32cw(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint32_t r_val = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(b, 32)); - uint32_t head = r_acc ^ r_val; - uint32_t tail = 0; - const uint32_t poly = 0x1EDC6F41; - for(int i = 31; i >= 0; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(tail, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32cw - #define __crc32cw(a, b) simde___crc32cw((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde___crc32cd(uint32_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_ACLE) - return __crc32cd(a, b); - #else - uint32_t r_acc = HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(a, 32)); - uint64_t r_val = simde_crc32_reverseBits(b, 64); - uint32_t val_head = HEDLEY_STATIC_CAST(uint32_t, r_val >> 32); - uint32_t val_mid = HEDLEY_STATIC_CAST(uint32_t, r_val & 0x00000000FFFFFFFF); - uint32_t head = r_acc ^ val_head; - uint32_t mid = 0u ^ val_mid; - uint32_t tail = 0u; - const uint32_t poly = 0x1EDC6F41; - for(int i = 31; i >= 0; --i) { - if ((head>>i) & 1) { - head = simde_crc32_eor_mask(head, poly >> (32-i), (1u << (i)) - 1); - mid = simde_crc32_eor_mask(mid, poly << i, 0xFFFFFFFF); - tail = simde_crc32_eor_mask(tail, 0x0, 0xFFFFFFFF); - } - } - for(int i = 31; i >= 0; --i) { - if ((mid>>i) & 1) { - mid = simde_crc32_eor_mask(mid, poly >> (32-i), (1u << (i)) - 1); - tail = simde_crc32_eor_mask(tail, poly << i, 0xFFFFFFFF); - } - } - return HEDLEY_STATIC_CAST(uint32_t, simde_crc32_reverseBits(tail, 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef __crc32cd - #define __crc32cd(a, b) simde___crc32cd((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CRC32_H) */ -/* :: End simde/arm/neon/crc32.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/create.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_CREATE_H) -#define SIMDE_ARM_NEON_CREATE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vcreate_s8(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_s8(a); - #else - return simde_vreinterpret_s8_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_s8 - #define vcreate_s8(a) simde_vcreate_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vcreate_s16(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_s16(a); - #else - return simde_vreinterpret_s16_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_s16 - #define vcreate_s16(a) simde_vcreate_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vcreate_s32(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_s32(a); - #else - return simde_vreinterpret_s32_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_s32 - #define vcreate_s32(a) simde_vcreate_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vcreate_s64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_s64(a); - #else - return simde_vreinterpret_s64_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_s64 - #define vcreate_s64(a) simde_vcreate_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vcreate_u8(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_u8(a); - #else - return simde_vreinterpret_u8_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_u8 - #define vcreate_u8(a) simde_vcreate_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vcreate_u16(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_u16(a); - #else - return simde_vreinterpret_u16_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_u16 - #define vcreate_u16(a) simde_vcreate_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vcreate_u32(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_u32(a); - #else - return simde_vreinterpret_u32_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_u32 - #define vcreate_u32(a) simde_vcreate_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vcreate_u64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_u64(a); - #else - return simde_vdup_n_u64(a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_u64 - #define vcreate_u64(a) simde_vcreate_u64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcreate_f16(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcreate_f16(a); - #else - return simde_vreinterpret_f16_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_f16 - #define vcreate_f16(a) simde_vcreate_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcreate_f32(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_f32(a); - #else - return simde_vreinterpret_f32_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_f32 - #define vcreate_f32(a) simde_vcreate_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vcreate_f64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vcreate_f64(a); - #else - return simde_vreinterpret_f64_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_f64 - #define vcreate_f64(a) simde_vcreate_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vcreate_p8(simde_poly64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_p8(a); - #else - return simde_vreinterpret_p8_p64(simde_vdup_n_p64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_p8 - #define vcreate_p8(a) simde_vcreate_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vcreate_p16(simde_poly64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vcreate_p16(a); - #else - return simde_vreinterpret_p16_p64(simde_vdup_n_p64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vcreate_p16 - #define vcreate_p16(a) simde_vcreate_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vcreate_p64(simde_poly64_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vcreate_p64(a); - #else - return simde_vdup_n_p64(a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcreate_p64 - #define vcreate_p64(a) simde_vcreate_p64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vcreate_bf16(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vcreate_bf16(a); - #else - return simde_vreinterpret_bf16_u64(simde_vdup_n_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcreate_bf16 - #define vcreate_bf16(a) simde_vcreate_bf16(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CREATE_H) */ -/* :: End simde/arm/neon/create.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/div.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_DIV_H) -#define SIMDE_ARM_NEON_DIV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vdivh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vdivh_f16(a, b); - #else - return simde_float16_from_float32(simde_float16_to_float32(a) / simde_float16_to_float32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdivh_f16 - #define vdivh_f16(a, b) simde_vdivh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vdiv_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vdiv_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vdivh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdiv_f16 - #define vdiv_f16(a, b) simde_vdiv_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vdivq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vdivq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vdivh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdivq_f16 - #define vdivq_f16(a, b) simde_vdivq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vdiv_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdiv_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] / b_.values[i]; - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdiv_f32 - #define vdiv_f32(a, b) simde_vdiv_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vdivq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdivq_f32(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] / b_.values[i]; - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdivq_f32 - #define vdivq_f32(a, b) simde_vdivq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vdiv_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdiv_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] / b_.values[i]; - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdiv_f64 - #define vdiv_f64(a, b) simde_vdiv_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vdivq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vdivq_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] / b_.values[i]; - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vdivq_f64 - #define vdivq_f64(a, b) simde_vdivq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MUL_H) */ -/* :: End simde/arm/neon/div.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/dot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_DOT_H) -#define SIMDE_ARM_NEON_DOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/paddl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_PADDL_H) -#define SIMDE_ARM_NEON_PADDL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/padd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_PADD_H) -#define SIMDE_ARM_NEON_PADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/uzp1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_UZP1_H) -#define SIMDE_ARM_NEON_UZP1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vuzp1_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzp1_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_f16 - #define vuzp1_f16(a, b) simde_vuzp1_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vuzp1_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2x2_t t = vuzp_f32(a, b); - return t.val[0]; - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_f32 - #define vuzp1_f32(a, b) simde_vuzp1_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vuzp1_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8x2_t t = vuzp_s8(a, b); - return t.val[0]; - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_s8 - #define vuzp1_s8(a, b) simde_vuzp1_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vuzp1_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a, b); - return t.val[0]; - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_s16 - #define vuzp1_s16(a, b) simde_vuzp1_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vuzp1_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a, b); - return t.val[0]; - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_s32 - #define vuzp1_s32(a, b) simde_vuzp1_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vuzp1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t t = vuzp_u8(a, b); - return t.val[0]; - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_u8 - #define vuzp1_u8(a, b) simde_vuzp1_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vuzp1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4x2_t t = vuzp_u16(a, b); - return t.val[0]; - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_u16 - #define vuzp1_u16(a, b) simde_vuzp1_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vuzp1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2x2_t t = vuzp_u32(a, b); - return t.val[0]; - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_u32 - #define vuzp1_u32(a, b) simde_vuzp1_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vuzp1q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzp1q_f16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - float16x8x2_t t = vuzpq_f16(a, b); - return t.val[0]; - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_f16 - #define vuzp1q_f16(a, b) simde_vuzp1q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vuzp1q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a, b); - return t.val[0]; - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); - #elif defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_shuffle_ps(a_.m128, b_.m128, 0x88); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_f32 - #define vuzp1q_f32(a, b) simde_vuzp1q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vuzp1q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a_.m128d), _mm_castpd_ps(b_.m128d))); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_f64 - #define vuzp1q_f64(a, b) simde_vuzp1q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vuzp1q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16x2_t t = vuzpq_s8(a, b); - return t.val[0]; - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_s8 - #define vuzp1q_s8(a, b) simde_vuzp1q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vuzp1q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a, b); - return t.val[0]; - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_s16 - #define vuzp1q_s16(a, b) simde_vuzp1q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vuzp1q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a, b); - return t.val[0]; - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0x88)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_s32 - #define vuzp1q_s32(a, b) simde_vuzp1q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vuzp1q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i))); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_s64 - #define vuzp1q_s64(a, b) simde_vuzp1q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vuzp1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16x2_t t = vuzpq_u8(a, b); - return t.val[0]; - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_u8 - #define vuzp1q_u8(a, b) simde_vuzp1q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vuzp1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8x2_t t = vuzpq_u16(a, b); - return t.val[0]; - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_u16 - #define vuzp1q_u16(a, b) simde_vuzp1q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vuzp1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4x2_t t = vuzpq_u32(a, b); - return t.val[0]; - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0x88)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_u32 - #define vuzp1q_u32(a, b) simde_vuzp1q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vuzp1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* _mm_movelh_ps?!?! SSE is weird. */ - r_.m128i = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i))); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_u64 - #define vuzp1q_u64(a, b) simde_vuzp1q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vuzp1_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x8x2_t t = vuzp_p8(a, b); - return t.val[0]; - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_p8 - #define vuzp1_p8(a, b) simde_vuzp1_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vuzp1_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x4x2_t t = vuzp_p16(a, b); - return t.val[0]; - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1_p16 - #define vuzp1_p16(a, b) simde_vuzp1_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vuzp1q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x16x2_t t = vuzpq_p8(a, b); - return t.val[0]; - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_p8 - #define vuzp1q_p8(a, b) simde_vuzp1q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vuzp1q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x8x2_t t = vuzpq_p16(a, b); - return t.val[0]; - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_p16 - #define vuzp1q_p16(a, b) simde_vuzp1q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vuzp1q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp1q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx]; - r_.values[i + halfway_point] = b_.values[idx]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp1q_p64 - #define vuzp1q_p64(a, b) simde_vuzp1q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_UZP1_H) */ -/* :: End simde/arm/neon/uzp1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/uzp2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_UZP2_H) -#define SIMDE_ARM_NEON_UZP2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vuzp2_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzp2_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_f16 - #define vuzp2_f16(a, b) simde_vuzp2_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vuzp2_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2x2_t t = vuzp_f32(a, b); - return t.val[1]; - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_f32 - #define vuzp2_f32(a, b) simde_vuzp2_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vuzp2_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8x2_t t = vuzp_s8(a, b); - return t.val[1]; - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_s8 - #define vuzp2_s8(a, b) simde_vuzp2_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vuzp2_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a, b); - return t.val[1]; - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_s16 - #define vuzp2_s16(a, b) simde_vuzp2_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vuzp2_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a, b); - return t.val[1]; - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_s32 - #define vuzp2_s32(a, b) simde_vuzp2_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vuzp2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t t = vuzp_u8(a, b); - return t.val[1]; - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_u8 - #define vuzp2_u8(a, b) simde_vuzp2_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vuzp2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4x2_t t = vuzp_u16(a, b); - return t.val[1]; - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_u16 - #define vuzp2_u16(a, b) simde_vuzp2_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vuzp2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2x2_t t = vuzp_u32(a, b); - return t.val[1]; - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_u32 - #define vuzp2_u32(a, b) simde_vuzp2_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vuzp2q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzp2q_f16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - float16x8x2_t t = vuzpq_f16(a, b); - return t.val[1]; - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_f16 - #define vuzp2q_f16(a, b) simde_vuzp2q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vuzp2q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a, b); - return t.val[1]; - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); - #elif defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_shuffle_ps(a_.m128, b_.m128, 0xdd); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_f32 - #define vuzp2q_f32(a, b) simde_vuzp2q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vuzp2q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_unpackhi_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_f64 - #define vuzp2q_f64(a, b) simde_vuzp2q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vuzp2q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16x2_t t = vuzpq_s8(a, b); - return t.val[1]; - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_s8 - #define vuzp2q_s8(a, b) simde_vuzp2q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vuzp2q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a, b); - return t.val[1]; - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_s16 - #define vuzp2q_s16(a, b) simde_vuzp2q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vuzp2q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a, b); - return t.val[1]; - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0xdd)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_s32 - #define vuzp2q_s32(a, b) simde_vuzp2q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vuzp2q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_s64 - #define vuzp2q_s64(a, b) simde_vuzp2q_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vuzp2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16x2_t t = vuzpq_u8(a, b); - return t.val[1]; - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_u8 - #define vuzp2q_u8(a, b) simde_vuzp2q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vuzp2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8x2_t t = vuzpq_u16(a, b); - return t.val[1]; - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_u16 - #define vuzp2q_u16(a, b) simde_vuzp2q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vuzp2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4x2_t t = vuzpq_u32(a, b); - return t.val[1]; - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0xdd)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_u32 - #define vuzp2q_u32(a, b) simde_vuzp2q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vuzp2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_u64 - #define vuzp2q_u64(a, b) simde_vuzp2q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vuzp2_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - poly8x8x2_t t = vuzp_p8(a, b); - return t.val[1]; - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_p8 - #define vuzp2_p8(a, b) simde_vuzp2_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vuzp2_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - poly16x4x2_t t = vuzp_p16(a, b); - return t.val[1]; - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2_p16 - #define vuzp2_p16(a, b) simde_vuzp2_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vuzp2q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - poly8x16x2_t t = vuzpq_p8(a, b); - return t.val[1]; - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_p8 - #define vuzp2q_p8(a, b) simde_vuzp2q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vuzp2q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - poly16x8x2_t t = vuzpq_p16(a, b); - return t.val[1]; - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_p16 - #define vuzp2q_p16(a, b) simde_vuzp2q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vuzp2q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuzp2q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[ i ] = a_.values[idx | 1]; - r_.values[i + halfway_point] = b_.values[idx | 1]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuzp2q_p64 - #define vuzp2q_p64(a, b) simde_vuzp2q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_UZP2_H) */ -/* :: End simde/arm/neon/uzp2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/get_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_GET_LANE_H) -#define SIMDE_ARM_NEON_GET_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vget_lane_f16(simde_float16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16_t r; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_4_(vget_lane_f16, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT16_VALUE(0.0)), lane, v); - #else - simde_float16x4_private v_ = simde_float16x4_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_lane_f16 - #define vget_lane_f16(v, lane) simde_vget_lane_f16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vget_lane_f32(simde_float32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); - #else - simde_float32x2_private v_ = simde_float32x2_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_f32 - #define vget_lane_f32(v, lane) simde_vget_lane_f32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vget_lane_f64(simde_float64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - return vget_lane_f64(v, 0); - #else - simde_float64x1_private v_ = simde_float64x1_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vget_lane_f64 - #define vget_lane_f64(v, lane) simde_vget_lane_f64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vget_lane_s8(simde_int8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vget_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); - #else - simde_int8x8_private v_ = simde_int8x8_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_s8 - #define vget_lane_s8(v, lane) simde_vget_lane_s8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vget_lane_s16(simde_int16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vget_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); - #else - simde_int16x4_private v_ = simde_int16x4_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_s16 - #define vget_lane_s16(v, lane) simde_vget_lane_s16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vget_lane_s32(simde_int32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vget_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); - #else - simde_int32x2_private v_ = simde_int32x2_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_s32 - #define vget_lane_s32(v, lane) simde_vget_lane_s32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vget_lane_s64(simde_int64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - int64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - return vget_lane_s64(v, 0); - #else - simde_int64x1_private v_ = simde_int64x1_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_s64 - #define vget_lane_s64(v, lane) simde_vget_lane_s64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vget_lane_u8(simde_uint8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vget_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); - #else - simde_uint8x8_private v_ = simde_uint8x8_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_u8 - #define vget_lane_u8(v, lane) simde_vget_lane_u8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vget_lane_u16(simde_uint16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vget_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); - #else - simde_uint16x4_private v_ = simde_uint16x4_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_u16 - #define vget_lane_u16(v, lane) simde_vget_lane_u16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vget_lane_u32(simde_uint32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vget_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); - #else - simde_uint32x2_private v_ = simde_uint32x2_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_u32 - #define vget_lane_u32(v, lane) simde_vget_lane_u32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vget_lane_u64(simde_uint64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - uint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - return vget_lane_u64(v, 0); - #else - simde_uint64x1_private v_ = simde_uint64x1_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_u64 - #define vget_lane_u64(v, lane) simde_vget_lane_u64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vgetq_lane_f16(simde_float16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_(vgetq_lane_f16, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT16_VALUE(0.0)), lane, v); - #else - simde_float16x8_private v_ = simde_float16x8_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_f16 - #define vgetq_lane_f16(v, lane) simde_vgetq_lane_f16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vgetq_lane_f32(simde_float32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); - #else - simde_float32x4_private v_ = simde_float32x4_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_CONSTIFY_4_(wasm_f32x4_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v_.v128); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_f32 - #define vgetq_lane_f32(v, lane) simde_vgetq_lane_f32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vgetq_lane_f64(simde_float64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_(vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v); - #else - simde_float64x2_private v_ = simde_float64x2_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_CONSTIFY_2_(wasm_f64x2_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v_.v128); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_f64 - #define vgetq_lane_f64(v, lane) simde_vgetq_lane_f64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vgetq_lane_s8(simde_int8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_(vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); - #else - simde_int8x16_private v_ = simde_int8x16_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int r_; - SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(int8_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_s8 - #define vgetq_lane_s8(v, lane) simde_vgetq_lane_s8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vgetq_lane_s16(simde_int16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); - #else - simde_int16x8_private v_ = simde_int16x8_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int r_; - SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(int16_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_s16 - #define vgetq_lane_s16(v, lane) simde_vgetq_lane_s16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vgetq_lane_s32(simde_int32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); - #else - simde_int32x4_private v_ = simde_int32x4_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int r_; - SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(int32_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_s32 - #define vgetq_lane_s32(v, lane) simde_vgetq_lane_s32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vgetq_lane_s64(simde_int64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - int64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v); - #else - simde_int64x2_private v_ = simde_int64x2_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int64_t r_; - SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(int64_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_s64 - #define vgetq_lane_s64(v, lane) simde_vgetq_lane_s64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vgetq_lane_u8(simde_uint8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_(vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); - #else - simde_uint8x16_private v_ = simde_uint8x16_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int r_; - SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(uint8_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_u8 - #define vgetq_lane_u8(v, lane) simde_vgetq_lane_u8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vgetq_lane_u16(simde_uint16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); - #else - simde_uint16x8_private v_ = simde_uint16x8_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int r_; - SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(uint16_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_u16 - #define vgetq_lane_u16(v, lane) simde_vgetq_lane_u16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vgetq_lane_u32(simde_uint32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); - #else - simde_uint32x4_private v_ = simde_uint32x4_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int32_t r_; - SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(uint32_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_u32 - #define vgetq_lane_u32(v, lane) simde_vgetq_lane_u32((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vgetq_lane_u64(simde_uint64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - uint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v); - #else - simde_uint64x2_private v_ = simde_uint64x2_to_private(v); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - int64_t r_; - SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v_.v128); - r = HEDLEY_STATIC_CAST(uint64_t, r_); - #else - r = v_.values[lane]; - #endif - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_u64 - #define vgetq_lane_u64(v, lane) simde_vgetq_lane_u64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8_t -simde_vget_lane_p8(simde_poly8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8_t r; - simde_poly8x8_private v_ = simde_poly8x8_to_private(v); - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vget_lane_p8(v, lane) vget_lane_p8((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_p8 - #define vget_lane_p8(v, lane) simde_vget_lane_p8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16_t -simde_vget_lane_p16(simde_poly16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16_t r; - simde_poly16x4_private v_ = simde_poly16x4_to_private(v); - - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vget_lane_p16(v, lane) vget_lane_p16((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vget_lane_p16 - #define vget_lane_p16(v, lane) simde_vget_lane_p16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64_t -simde_vget_lane_p64(simde_poly64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64_t r; - simde_poly64x1_private v_ = simde_poly64x1_to_private(v); - - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vget_lane_p64(v, lane) vget_lane_p64((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_lane_p64 - #define vget_lane_p64(v, lane) simde_vget_lane_p64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8_t -simde_vgetq_lane_p8(simde_poly8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8_t r; - simde_poly8x16_private v_ = simde_poly8x16_to_private(v); - - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vgetq_lane_p8(v, lane) vgetq_lane_p8((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_p8 - #define vgetq_lane_p8(v, lane) simde_vgetq_lane_p8((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16_t -simde_vgetq_lane_p16(simde_poly16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16_t r; - simde_poly16x8_private v_ = simde_poly16x8_to_private(v); - - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vgetq_lane_p16(v, lane) vgetq_lane_p16((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_p16 - #define vgetq_lane_p16(v, lane) simde_vgetq_lane_p16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64_t -simde_vgetq_lane_p64(simde_poly64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64_t r; - simde_poly64x2_private v_ = simde_poly64x2_to_private(v); - - r = v_.values[lane]; - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vgetq_lane_p64(v, lane) vgetq_lane_p64((v), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_p64 - #define vgetq_lane_p64(v, lane) simde_vgetq_lane_p64((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16_t -simde_vget_lane_bf16(simde_bfloat16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16_t r; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_(vget_lane_bf16, r, (HEDLEY_UNREACHABLE(), SIMDE_BFLOAT16_VALUE(0.0)), lane, v); - #else - simde_bfloat16x4_private v_ = simde_bfloat16x4_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vget_lane_bf16 - #define vget_lane_bf16(v, lane) simde_vget_lane_bf16((v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16_t -simde_vgetq_lane_bf16(simde_bfloat16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16_t r; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_(vgetq_lane_bf16, r, (HEDLEY_UNREACHABLE(), SIMDE_BFLOAT16_VALUE(0.0)), lane, v); - #else - simde_bfloat16x8_private v_ = simde_bfloat16x8_to_private(v); - - r = v_.values[lane]; - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vgetq_lane_bf16 - #define vgetq_lane_bf16(v, lane) simde_vgetq_lane_bf16((v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_GET_LANE_H) */ -/* :: End simde/arm/neon/get_lane.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vpaddd_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddd_s64(a); - #else - return simde_vaddd_s64(simde_vgetq_lane_s64(a, 0), simde_vgetq_lane_s64(a, 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpaddd_s64 - #define vpaddd_s64(a) simde_vpaddd_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vpaddd_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddd_u64(a); - #else - return simde_vaddd_u64(simde_vgetq_lane_u64(a, 0), simde_vgetq_lane_u64(a, 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpaddd_u64 - #define vpaddd_u64(a) simde_vpaddd_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vpaddd_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddd_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - return a_.values[0] + a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpaddd_f64 - #define vpaddd_f64(a) simde_vpaddd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vpadds_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpadds_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - return a_.values[0] + a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpadds_f32 - #define vpadds_f32(a) simde_vpadds_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vpadd_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) && defined(SIMDE_ARM_NEON_FP16) - return vpadd_f16(a, b); - #else - return simde_vadd_f16(simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vpadd_f16 - #define vpadd_f16(a, b) simde_vpadd_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vpadd_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) - return vpadd_f32(a, b); - #else - return simde_vadd_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_f32 - #define vpadd_f32(a, b) simde_vpadd_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vpadd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_s8(a, b); - #else - return simde_vadd_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_s8 - #define vpadd_s8(a, b) simde_vpadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vpadd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_s16(a, b); - #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return simde_int16x4_from_m64(_mm_hadd_pi16(simde_int16x4_to_m64(a), simde_int16x4_to_m64(b))); - #else - return simde_vadd_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_s16 - #define vpadd_s16(a, b) simde_vpadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vpadd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_s32(a, b); - #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return simde_int32x2_from_m64(_mm_hadd_pi32(simde_int32x2_to_m64(a), simde_int32x2_to_m64(b))); - #else - return simde_vadd_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_s32 - #define vpadd_s32(a, b) simde_vpadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vpadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_u8(a, b); - #else - return simde_vadd_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_u8 - #define vpadd_u8(a, b) simde_vpadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vpadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_u16(a, b); - #else - return simde_vadd_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_u16 - #define vpadd_u16(a, b) simde_vpadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vpadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadd_u32(a, b); - #else - return simde_vadd_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadd_u32 - #define vpadd_u32(a, b) simde_vpadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vpaddq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpaddq_f16(a, b); - #else - return simde_vaddq_f16(simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpaddq_f16 - #define vpaddq_f16(a, b) simde_vpaddq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vpaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_f32(a, b); - #elif defined(SIMDE_X86_SSE3_NATIVE) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE3_NATIVE) - r_.m128 = _mm_hadd_ps(a_.m128, b_.m128); - #endif - - return simde_float32x4_from_private(r_); - #else - return simde_vaddq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_f32 - #define vpaddq_f32(a, b) simde_vpaddq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vpaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_f64(a, b); - #elif defined(SIMDE_X86_SSE3_NATIVE) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE3_NATIVE) - r_.m128d = _mm_hadd_pd(a_.m128d, b_.m128d); - #endif - - return simde_float64x2_from_private(r_); - #else - return simde_vaddq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpaddq_f64 - #define vpaddq_f64(a, b) simde_vpaddq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vpaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_s8(a, b); - #else - return simde_vaddq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_s8 - #define vpaddq_s8(a, b) simde_vpaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vpaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_s16(a, b); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_hadd_epi16(a_.m128i, b_.m128i); - #endif - - return simde_int16x8_from_private(r_); - #else - return simde_vaddq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_s16 - #define vpaddq_s16(a, b) simde_vpaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vpaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_s32(a, b); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_hadd_epi32(a_.m128i, b_.m128i); - #endif - - return simde_int32x4_from_private(r_); - #else - return simde_vaddq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_s32 - #define vpaddq_s32(a, b) simde_vpaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vpaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_s64(a, b); - #else - return simde_vaddq_s64(simde_vuzp1q_s64(a, b), simde_vuzp2q_s64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_s64 - #define vpaddq_s64(a, b) simde_vpaddq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vpaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_u8(a, b); - #else - return simde_vaddq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_u8 - #define vpaddq_u8(a, b) simde_vpaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vpaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_u16(a, b); - #else - return simde_vaddq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_u16 - #define vpaddq_u16(a, b) simde_vpaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vpaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_u32(a, b); - #else - return simde_vaddq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_u32 - #define vpaddq_u32(a, b) simde_vpaddq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vpaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpaddq_u64(a, b); - #else - return simde_vaddq_u64(simde_vuzp1q_u64(a, b), simde_vuzp2q_u64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddq_u64 - #define vpaddq_u64(a, b) simde_vpaddq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_PADD_H) */ -/* :: End simde/arm/neon/padd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shl_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_SHL_N_H) -#define SIMDE_ARM_NEON_SHL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vshld_n_s64 (const int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - return HEDLEY_STATIC_CAST(int64_t, a << n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshld_n_s64(a, n) vshld_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshld_n_s64 - #define vshld_n_s64(a, n) simde_vshld_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vshld_n_u64 (const uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - return HEDLEY_STATIC_CAST(uint64_t, a << n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshld_n_u64(a, n) vshld_n_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshld_n_u64 - #define vshld_n_u64(a, n) simde_vshld_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vshl_n_s8 (const simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); - } - #endif - - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_s8(a, n) vshl_n_s8((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_s8(a, n) \ - simde_int8x8_from_m64(_mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64(simde_int8x8_to_m64(a), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_s8 - #define vshl_n_s8(a, n) simde_vshl_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vshl_n_s16 (const simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); - } - #endif - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_s16(a, n) vshl_n_s16((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_s16(a, n) simde_int16x4_from_m64(_mm_slli_pi16(simde_int16x4_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_s16 - #define vshl_n_s16(a, n) simde_vshl_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vshl_n_s32 (const simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); - } - #endif - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_s32(a, n) vshl_n_s32((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_s32(a, n) simde_int32x2_from_m64(_mm_slli_pi32(simde_int32x2_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_s32 - #define vshl_n_s32(a, n) simde_vshl_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vshl_n_s64 (const simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); - } - #endif - - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_s64(a, n) vshl_n_s64((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_s64(a, n) simde_int64x1_from_m64(_mm_slli_si64(simde_int64x1_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_s64 - #define vshl_n_s64(a, n) simde_vshl_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vshl_n_u8 (const simde_uint8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); - } - #endif - - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_u8(a, n) vshl_n_u8((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_u8(a, n) \ - simde_uint8x8_from_m64(_mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64(simde_uint8x8_to_m64(a), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_u8 - #define vshl_n_u8(a, n) simde_vshl_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vshl_n_u16 (const simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); - } - #endif - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_u16(a, n) vshl_n_u16((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_u16(a, n) simde_uint16x4_from_m64(_mm_slli_pi16(simde_uint16x4_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_u16 - #define vshl_n_u16(a, n) simde_vshl_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vshl_n_u32 (const simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); - } - #endif - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_u32(a, n) vshl_n_u32((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_u32(a, n) simde_uint32x2_from_m64(_mm_slli_pi32(simde_uint32x2_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_u32 - #define vshl_n_u32(a, n) simde_vshl_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vshl_n_u64 (const simde_uint64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); - } - #endif - - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshl_n_u64(a, n) vshl_n_u64((a), (n)) -#elif defined(SIMDE_X86_MMX_NATIVE) - #define simde_vshl_n_u64(a, n) simde_uint64x1_from_m64(_mm_slli_si64(simde_uint64x1_to_m64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_n_u64 - #define vshl_n_u64(a, n) simde_vshl_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vshlq_n_s8 (const simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_GFNI_NATIVE) - /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a_.m128i, n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); - } - #endif - - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_s8(a, n) vshlq_n_s8((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_s8(a, n) (vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned char, int, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_s8 - #define vshlq_n_s8(a, n) simde_vshlq_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vshlq_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi16(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); - } - #endif - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_s16(a, n) vshlq_n_s16((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_s16(a, n) (vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned short, int, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_s16 - #define vshlq_n_s16(a, n) simde_vshlq_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vshlq_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi32(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); - } - #endif - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_s32(a, n) vshlq_n_s32((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_s32(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_s32 - #define vshlq_n_s32(a, n) simde_vshlq_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vshlq_n_s64 (const simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi64(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); - } - #endif - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_s64(a, n) vshlq_n_s64((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - #define simde_vshlq_n_s64(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_s64 - #define vshlq_n_s64(a, n) simde_vshlq_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vshlq_n_u8 (const simde_uint8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - #if defined(SIMDE_X86_GFNI_NATIVE) - /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a_.m128i, (n))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); - } - #endif - - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_u8(a, n) vshlq_n_u8((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_u8(a, n) (vec_sl((a), vec_splat_u8(n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_u8 - #define vshlq_n_u8(a, n) simde_vshlq_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vshlq_n_u16 (const simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi16(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); - } - #endif - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_u16(a, n) vshlq_n_u16((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_u16(a, n) (vec_sl((a), vec_splat_u16(n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_u16 - #define vshlq_n_u16(a, n) simde_vshlq_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vshlq_n_u32 (const simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi32(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); - } - #endif - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_u32(a, n) vshlq_n_u32((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_vshlq_n_u32(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_u32 - #define vshlq_n_u32(a, n) simde_vshlq_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vshlq_n_u64 (const simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_slli_epi64(a_.m128i, (n)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values << n; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); - } - #endif - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshlq_n_u64(a, n) vshlq_n_u64((a), (n)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - #define simde_vshlq_n_u64(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_n_u64 - #define vshlq_n_u64(a, n) simde_vshlq_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHL_N_H) */ -/* :: End simde/arm/neon/shl_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vpaddl_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_s8(a); - #else - simde_int16x8_t tmp = simde_vmovl_s8(a); - return simde_vpadd_s16(simde_vget_low_s16(tmp), simde_vget_high_s16(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_s8 - #define vpaddl_s8(a) simde_vpaddl_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vpaddl_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_s16(a); - #else - simde_int32x4_t tmp = simde_vmovl_s16(a); - return simde_vpadd_s32(simde_vget_low_s32(tmp), simde_vget_high_s32(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_s16 - #define vpaddl_s16(a) simde_vpaddl_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vpaddl_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_s32(a); - #else - simde_int64x2_t tmp = simde_vmovl_s32(a); - return simde_vadd_s64(simde_vget_low_s64(tmp), simde_vget_high_s64(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_s32 - #define vpaddl_s32(a) simde_vpaddl_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vpaddl_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_u8(a); - #else - simde_uint16x8_t tmp = simde_vmovl_u8(a); - return simde_vpadd_u16(simde_vget_low_u16(tmp), simde_vget_high_u16(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_u8 - #define vpaddl_u8(a) simde_vpaddl_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vpaddl_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_u16(a); - #else - simde_uint32x4_t tmp = simde_vmovl_u16(a); - return simde_vpadd_u32(simde_vget_low_u32(tmp), simde_vget_high_u32(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_u16 - #define vpaddl_u16(a) simde_vpaddl_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vpaddl_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddl_u32(a); - #else - simde_uint64x2_t tmp = simde_vmovl_u32(a); - return simde_vadd_u64(simde_vget_low_u64(tmp), simde_vget_high_u64(tmp)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddl_u32 - #define vpaddl_u32(a) simde_vpaddl_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vpaddlq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #elif \ - defined(SIMDE_X86_XOP_NATIVE) || \ - defined(SIMDE_X86_SSSE3_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_int16x8_private r_; - - #if defined(SIMDE_X86_XOP_NATIVE) - r_.m128i = _mm_haddw_epi8(a_.m128i); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_extadd_pairwise_i8x16(a_.v128); - #endif - - return simde_int16x8_from_private(r_); - #else - simde_int16x8_t lo = simde_vshrq_n_s16(simde_vshlq_n_s16(simde_vreinterpretq_s16_s8(a), 8), 8); - simde_int16x8_t hi = simde_vshrq_n_s16(simde_vreinterpretq_s16_s8(a), 8); - return simde_vaddq_s16(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_s8 - #define vpaddlq_s8(a) simde_vpaddlq_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vpaddlq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #elif \ - defined(SIMDE_X86_XOP_NATIVE) || \ - defined(SIMDE_X86_SSE2_NATIVE) - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_int32x4_private r_; - - #if defined(SIMDE_X86_XOP_NATIVE) - r_.m128i = _mm_haddd_epi16(a_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_madd_epi16(a_.m128i, _mm_set1_epi16(INT8_C(1))); - #endif - - return simde_int32x4_from_private(r_); - #else - simde_int32x4_t lo = simde_vshrq_n_s32(simde_vshlq_n_s32(simde_vreinterpretq_s32_s16(a), 16), 16); - simde_int32x4_t hi = simde_vshrq_n_s32(simde_vreinterpretq_s32_s16(a), 16); - return simde_vaddq_s32(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_s16 - #define vpaddlq_s16(a) simde_vpaddlq_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vpaddlq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_s32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(int) one = vec_splat_s32(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #else - simde_int64x2_t lo = simde_vshrq_n_s64(simde_vshlq_n_s64(simde_vreinterpretq_s64_s32(a), 32), 32); - simde_int64x2_t hi = simde_vshrq_n_s64(simde_vreinterpretq_s64_s32(a), 32); - return simde_vaddq_s64(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_s32 - #define vpaddlq_s32(a) simde_vpaddlq_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vpaddlq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_u8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #elif \ - defined(SIMDE_X86_XOP_NATIVE) || \ - defined(SIMDE_X86_SSSE3_NATIVE) - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_X86_XOP_NATIVE) - r_.m128i = _mm_haddw_epu8(a_.m128i); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_maddubs_epi16(a_.m128i, _mm_set1_epi8(INT8_C(1))); - #endif - - return simde_uint16x8_from_private(r_); - #else - simde_uint16x8_t lo = simde_vshrq_n_u16(simde_vshlq_n_u16(simde_vreinterpretq_u16_u8(a), 8), 8); - simde_uint16x8_t hi = simde_vshrq_n_u16(simde_vreinterpretq_u16_u8(a), 8); - return simde_vaddq_u16(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_u8 - #define vpaddlq_u8(a) simde_vpaddlq_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vpaddlq_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_u16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #elif \ - defined(SIMDE_X86_XOP_NATIVE) || \ - defined(SIMDE_X86_SSSE3_NATIVE) - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_XOP_NATIVE) - r_.sse_m128i = _mm_haddd_epu16(a_.sse_m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = - _mm_add_epi32( - _mm_srli_epi32(a_.m128i, 16), - _mm_and_si128(a_.m128i, _mm_set1_epi32(INT32_C(0x0000ffff))) - ); - #endif - - return simde_uint32x4_from_private(r_); - #else - simde_uint32x4_t lo = simde_vshrq_n_u32(simde_vshlq_n_u32(simde_vreinterpretq_u32_u16(a), 16), 16); - simde_uint32x4_t hi = simde_vshrq_n_u32(simde_vreinterpretq_u32_u16(a), 16); - return simde_vaddq_u32(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_u16 - #define vpaddlq_u16(a) simde_vpaddlq_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vpaddlq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpaddlq_u32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) one = vec_splat_u32(1); - return - vec_add( - vec_mule(a, one), - vec_mulo(a, one) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_uint64x2_private r_; - - r_.m128i = - _mm_add_epi64( - _mm_srli_epi64(a_.m128i, 32), - _mm_and_si128(a_.m128i, _mm_set1_epi64x(INT64_C(0x00000000ffffffff))) - ); - - return simde_uint64x2_from_private(r_); - #else - simde_uint64x2_t lo = simde_vshrq_n_u64(simde_vshlq_n_u64(simde_vreinterpretq_u64_u32(a), 32), 32); - simde_uint64x2_t hi = simde_vshrq_n_u64(simde_vreinterpretq_u64_u32(a), 32); - return simde_vaddq_u64(lo, hi); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpaddlq_u32 - #define vpaddlq_u32(a) simde_vpaddlq_u32((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_PADDL_H */ -/* :: End simde/arm/neon/paddl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULL_H) -#define SIMDE_ARM_NEON_MULL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmull_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); - #else - simde_int16x8_private r_; - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) * HEDLEY_STATIC_CAST(int16_t, b_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_s8 - #define vmull_s8(a, b) simde_vmull_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); - #else - simde_int32x4_private r_; - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_s16 - #define vmull_s16(a, b) simde_vmull_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_s32(a, b); - #else - simde_int64x2_private r_; - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_s32 - #define vmull_s32(a, b) simde_vmull_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmull_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); - #else - simde_uint16x8_private r_; - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_u8 - #define vmull_u8(a, b) simde_vmull_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); - #else - simde_uint32x4_private r_; - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_u16 - #define vmull_u16(a, b) simde_vmull_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_u32(a, b); - #else - simde_uint64x2_private r_; - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.values) av, bv; - SIMDE_CONVERT_VECTOR_(av, a_.values); - SIMDE_CONVERT_VECTOR_(bv, b_.values); - r_.values = av * bv; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_u32 - #define vmull_u32(a, b) simde_vmull_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vmull_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_p8(a, b); - #else - simde_uint8x8_private - a_ = simde_uint8x8_to_private(simde_vreinterpret_u8_p8(a)), - b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_p8(b)); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint16_t extend_op2 = HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); - uint16_t result = 0; - for(size_t j = 0; j < 8; ++j) { - if (a_.values[i] & (1 << j)) { - result = HEDLEY_STATIC_CAST(uint16_t, result ^ (extend_op2 << j)); - } - } - r_.values[i] = result; - } - - return simde_vreinterpretq_p16_u16(simde_uint16x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_p8 - #define vmull_p8(a, b) simde_vmull_p8((a), (b)) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vmull_p64(simde_poly64_t a, simde_poly64_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vmull_p64(a, b); - #else - simde_poly128_t extend_op2 = HEDLEY_STATIC_CAST(simde_poly128_t, b); - simde_poly128_t result = 0; - SIMDE_VECTORIZE - for(size_t j = 0; j < 64; ++j) { - if (a & (1ull << j)) { - result = result ^ (extend_op2 << j); - } - } - return result; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmull_p64 - #define vmull_p64(a, b) simde_vmull_p64((a), (b)) -#endif - -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ -/* :: End simde/arm/neon/mull.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vdot_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - return vdot_s32(r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde_vadd_s32(r, simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(a, b))))); - #else - simde_int32x2_private r_; - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vadd_s32(r, simde_int32x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_s32 - #define vdot_s32(r, a, b) simde_vdot_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vdot_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - return vdot_u32(r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde_vadd_u32(r, simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(a, b))))); - #else - simde_uint32x2_private r_; - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vadd_u32(r, simde_uint32x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_u32 - #define vdot_u32(r, a, b) simde_vdot_u32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdotq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - return vdotq_s32(r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde_vaddq_s32(r, - simde_vcombine_s32(simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_low_s8(a), simde_vget_low_s8(b))))), - simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_high_s8(a), simde_vget_high_s8(b))))))); - #else - simde_int32x4_private r_; - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - for (int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vaddq_s32(r, simde_int32x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_s32 - #define vdotq_s32(r, a, b) simde_vdotq_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdotq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - return vdotq_u32(r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde_vaddq_u32(r, - simde_vcombine_u32(simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_low_u8(a), simde_vget_low_u8(b))))), - simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_high_u8(a), simde_vget_high_u8(b))))))); - #else - simde_uint32x4_private r_; - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - for (int i = 0 ; i < 4 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vaddq_u32(r, simde_uint32x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_u32 - #define vdotq_u32(r, a, b) simde_vdotq_u32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vbfdot_f32(simde_float32x2_t r, simde_bfloat16x4_t a, simde_bfloat16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - defined(SIMDE_ARM_NEON_BF16) - return vbfdot_f32(r, a, b); - #else - simde_float32x2_private r_ = simde_float32x2_to_private(r); - simde_bfloat16x4_private - a_ = simde_bfloat16x4_to_private(a), - b_ = simde_bfloat16x4_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * i + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * i + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdot_f32 - #define vbfdot_f32(r, a, b) simde_vbfdot_f32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfdotq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) && \ - defined(SIMDE_ARM_NEON_BF16) - return vbfdotq_f32(r, a, b); - #else - simde_float32x4_private r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * i + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * i + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdotq_f32 - #define vbfdotq_f32(r, a, b) simde_vbfdotq_f32((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_DOT_H) */ -/* :: End simde/arm/neon/dot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/dot_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_DOT_LANE_H) -#define SIMDE_ARM_NEON_DOT_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vdot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_2_(vdot_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x2_t - b_lane, - b_32 = vreinterpret_s32_s8(b); - - SIMDE_CONSTIFY_2_(vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - result = - vadd_s32( - r, - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(a, vreinterpret_s8_s32(b_lane)) - ) - ) - ) - ); - #else - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_lane_s32 - #define vdot_lane_s32(r, a, b, lane) simde_vdot_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vdot_lane_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_2_(vdot_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x2_t - b_lane, - b_32 = vreinterpret_u32_u8(b); - - SIMDE_CONSTIFY_2_(vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - result = - vadd_u32( - r, - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(a, vreinterpret_u8_u32(b_lane)) - ) - ) - ) - ); - #else - simde_uint32x2_private r_ = simde_uint32x2_to_private(r); - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_uint32x2_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_lane_u32 - #define vdot_lane_u32(r, a, b, lane) simde_vdot_lane_u32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vdot_laneq_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x2_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_4_(vdot_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x2_t b_lane; - simde_int32x4_t b_32 = vreinterpretq_s32_s8(b); - - SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - result = - vadd_s32( - r, - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(a, vreinterpret_s8_s32(b_lane)) - ) - ) - ) - ); - #else - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_laneq_s32 - #define vdot_laneq_s32(r, a, b, lane) simde_vdot_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vdot_laneq_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x2_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_4_(vdot_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x2_t b_lane; - simde_uint32x4_t b_32 = vreinterpretq_u32_u8(b); - - SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - result = - vadd_u32( - r, - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(a, vreinterpret_u8_u32(b_lane)) - ) - ) - ) - ); - #else - simde_uint32x2_private r_ = simde_uint32x2_to_private(r); - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_uint32x2_from_private(r_); - #endif - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdot_laneq_u32 - #define vdot_laneq_u32(r, a, b, lane) simde_vdot_laneq_u32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdotq_laneq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_4_(vdotq_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x4_t - b_lane, - b_32 = vreinterpretq_u32_u8(b); - SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - - result = - vcombine_u32( - vadd_u32( - vget_low_u32(r), - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(vget_low_u8(a), vget_low_u8(vreinterpretq_u8_u32(b_lane))) - ) - ) - ) - ), - vadd_u32( - vget_high_u32(r), - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(vget_high_u8(a), vget_high_u8(vreinterpretq_u8_u32(b_lane))) - ) - ) - ) - ) - ); - #else - simde_uint32x4_private r_ = simde_uint32x4_to_private(r); - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_uint32x4_from_private(r_); - #endif - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_laneq_u32 - #define vdotq_laneq_u32(r, a, b, lane) simde_vdotq_laneq_u32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdotq_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_4_(vdotq_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x4_t - b_lane, - b_32 = vreinterpretq_s32_s8(b); - SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - - result = - vcombine_s32( - vadd_s32( - vget_low_s32(r), - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(vget_low_s8(a), vget_low_s8(vreinterpretq_s8_s32(b_lane))) - ) - ) - ) - ), - vadd_s32( - vget_high_s32(r), - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(vget_high_s8(a), vget_high_s8(vreinterpretq_s8_s32(b_lane))) - ) - ) - ) - ) - ); - #else - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - #endif - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_laneq_s32 - #define vdotq_laneq_s32(r, a, b, lane) simde_vdotq_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vdotq_lane_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x4_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_2_(vdotq_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x2_t - b_lane, - b_32 = vreinterpret_u32_u8(b); - SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - - result = - vcombine_u32( - vadd_u32( - vget_low_u32(r), - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(vget_low_u8(a), vreinterpret_u8_u32(b_lane)) - ) - ) - ) - ), - vadd_u32( - vget_high_u32(r), - vmovn_u64( - vpaddlq_u32( - vpaddlq_u16( - vmull_u8(vget_high_u8(a), vreinterpret_u8_u32(b_lane)) - ) - ) - ) - ) - ); - #else - simde_uint32x4_private r_ = simde_uint32x4_to_private(r); - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - uint32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_uint32x4_from_private(r_); - #endif - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_lane_u32 - #define vdotq_lane_u32(r, a, b, lane) simde_vdotq_lane_u32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vdotq_lane_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x4_t result; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) - SIMDE_CONSTIFY_2_(vdotq_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x2_t - b_lane, - b_32 = vreinterpret_s32_s8(b); - SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); - - result = - vcombine_s32( - vadd_s32( - vget_low_s32(r), - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(vget_low_s8(a), vreinterpret_s8_s32(b_lane)) - ) - ) - ) - ), - vadd_s32( - vget_high_s32(r), - vmovn_s64( - vpaddlq_s32( - vpaddlq_s16( - vmull_s8(vget_high_s8(a), vreinterpret_s8_s32(b_lane)) - ) - ) - ) - ) - ); - #else - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - #endif - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vdotq_lane_s32 - #define vdotq_lane_s32(r, a, b, lane) simde_vdotq_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vbfdot_lane_f32(simde_float32x2_t r, simde_bfloat16x4_t a, simde_bfloat16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) && \ - defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_2_(vbfdot_lane_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private r_ = simde_float32x2_to_private(r); - simde_bfloat16x4_private - a_ = simde_bfloat16x4_to_private(a), - b_ = simde_bfloat16x4_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * lane + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * lane + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - - result = simde_float32x2_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdot_lane_f32 - #define vbfdot_lane_f32(r, a, b, lane) simde_vbfdot_lane_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfdotq_lane_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) && \ - defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_2_(vbfdotq_lane_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_bfloat16x4_private b_ = simde_bfloat16x4_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * lane + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * lane + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - - result = simde_float32x4_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdotq_lane_f32 - #define vbfdotq_lane_f32(r, a, b, lane) simde_vbfdotq_lane_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vbfdot_laneq_f32(simde_float32x2_t r, simde_bfloat16x4_t a, simde_bfloat16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) && \ - defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_(vbfdot_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private r_ = simde_float32x2_to_private(r); - simde_bfloat16x4_private a_ = simde_bfloat16x4_to_private(a); - simde_bfloat16x8_private b_ = simde_bfloat16x8_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * lane + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * lane + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - - result = simde_float32x2_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdot_laneq_f32 - #define vbfdot_laneq_f32(r, a, b, lane) simde_vbfdot_laneq_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfdotq_laneq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) && \ - defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_(vbfdotq_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - simde_float32_t elt1_a = simde_bfloat16_to_float32(a_.values[2 * i + 0]); - simde_float32_t elt1_b = simde_bfloat16_to_float32(a_.values[2 * i + 1]); - simde_float32_t elt2_a = simde_bfloat16_to_float32(b_.values[2 * lane + 0]); - simde_float32_t elt2_b = simde_bfloat16_to_float32(b_.values[2 * lane + 1]); - r_.values[i] = r_.values[i] + elt1_a * elt2_a + elt1_b * elt2_b; - } - - result = simde_float32x4_from_private(r_); - #endif - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfdotq_laneq_f32 - #define vbfdotq_laneq_f32(r, a, b, lane) simde_vbfdotq_laneq_f32((r), (a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_DOT_LANE_H) */ -/* :: End simde/arm/neon/dot_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ext.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_EXT_H) -#define SIMDE_ARM_NEON_EXT_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vext_f16(simde_float16x4_t a, simde_float16x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - simde_float16x4_t r; - SIMDE_CONSTIFY_4_(vext_f16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_f16 - #define vext_f16(a, b, n) simde_vext_f16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_float32x2_t r; - SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_float32x2_private - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_f32(a, b, n) simde_float32x2_from_m64(_mm_alignr_pi8(simde_float32x2_to_m64(b), simde_float32x2_to_m64(a), n * sizeof(simde_float32))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_f32(a, b, n) (__extension__ ({ \ - simde_float32x2_private simde_vext_f32_r_; \ - simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_float32x2_from_private(simde_vext_f32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_f32 - #define vext_f32(a, b, n) simde_vext_f32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) n; - return vext_f64(a, b, 0); - #else - simde_float64x1_private - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; - } - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_f64(a, b, n) simde_float64x1_from_m64(_mm_alignr_pi8(simde_float64x1_to_m64(b), simde_float64x1_to_m64(a), n * sizeof(simde_float64))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vext_f64(a, b, n) (__extension__ ({ \ - simde_float64x1_private simde_vext_f64_r_; \ - simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, (n))); \ - simde_float64x1_from_private(simde_vext_f64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vext_f64 - #define vext_f64(a, b, n) simde_vext_f64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int8x8_t r; - SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_s8(a, b, n) simde_int8x8_from_m64(_mm_alignr_pi8(simde_int8x8_to_m64(b), simde_int8x8_to_m64(a), n * sizeof(int8_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_s8(a, b, n) (__extension__ ({ \ - simde_int8x8_private simde_vext_s8_r_; \ - simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ - simde_int8x8_from_private(simde_vext_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_s8 - #define vext_s8(a, b, n) simde_vext_s8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int16x4_t r; - SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_s16(a, b, n) simde_int16x4_from_m64(_mm_alignr_pi8(simde_int16x4_to_m64(b), simde_int16x4_to_m64(a), n * sizeof(int16_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_s16(a, b, n) (__extension__ ({ \ - simde_int16x4_private simde_vext_s16_r_; \ - simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_int16x4_from_private(simde_vext_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_s16 - #define vext_s16(a, b, n) simde_vext_s16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x2_t r; - SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_s32(a, b, n) simde_int32x2_from_m64(_mm_alignr_pi8(simde_int32x2_to_m64(b), simde_int32x2_to_m64(a), n * sizeof(int32_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_s32(a, b, n) (__extension__ ({ \ - simde_int32x2_private simde_vext_s32_r_; \ - simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_int32x2_from_private(simde_vext_s32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_s32 - #define vext_s32(a, b, n) simde_vext_s32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) n; - return vext_s64(a, b, 0); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; - } - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_s64(a, b, n) simde_int64x1_from_m64(_mm_alignr_pi8(simde_int64x1_to_m64(b), simde_int64x1_to_m64(a), n * sizeof(int64_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vext_s64(a, b, n) (__extension__ ({ \ - simde_int64x1_private simde_vext_s64_r_; \ - simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \ - simde_int64x1_from_private(simde_vext_s64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_s64 - #define vext_s64(a, b, n) simde_vext_s64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint8x8_t r; - SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_u8(a, b, n) simde_uint8x8_from_m64(_mm_alignr_pi8(simde_uint8x8_to_m64(b), simde_uint8x8_to_m64(a), n * sizeof(uint8_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_u8(a, b, n) (__extension__ ({ \ - simde_uint8x8_private simde_vext_u8_r_; \ - simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ - simde_uint8x8_from_private(simde_vext_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_u8 - #define vext_u8(a, b, n) simde_vext_u8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint16x4_t r; - SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_u16(a, b, n) simde_uint16x4_from_m64(_mm_alignr_pi8(simde_uint16x4_to_m64(b), simde_uint16x4_to_m64(a), n * sizeof(uint16_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_u16(a, b, n) (__extension__ ({ \ - simde_uint16x4_private simde_vext_u16_r_; \ - simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_uint16x4_from_private(simde_vext_u16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_u16 - #define vext_u16(a, b, n) simde_vext_u16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x2_t r; - SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_u32(a, b, n) simde_uint32x2_from_m64(_mm_alignr_pi8(simde_uint32x2_to_m64(b), simde_uint32x2_to_m64(a), n * sizeof(uint32_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) - #define simde_vext_u32(a, b, n) (__extension__ ({ \ - simde_uint32x2_private simde_vext_u32_r_; \ - simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_uint32x2_from_private(simde_vext_u32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_u32 - #define vext_u32(a, b, n) simde_vext_u32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) n; - return vext_u64(a, b, 0); - #else - simde_uint64x1_private - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; - } - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vext_u64(a, b, n) simde_uint64x1_from_m64(_mm_alignr_pi8(simde_uint64x1_to_m64(b), simde_uint64x1_to_m64(a), n * sizeof(uint64_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vext_u64(a, b, n) (__extension__ ({ \ - simde_uint64x1_private simde_vext_u64_r_; \ - simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \ - simde_uint64x1_from_private(simde_vext_u64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_u64 - #define vext_u64(a, b, n) simde_vext_u64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vextq_f16(simde_float16x8_t a, simde_float16x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - simde_float16x8_t r; - SIMDE_CONSTIFY_8_(vextq_f16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_f16 - #define vextq_f16(a, b, n) simde_vextq_f16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_float32x4_t r; - SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_float32x4_private - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_f32(a, b, n) simde_float32x4_from_m128(_mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(simde_float32x4_to_m128(b)), _mm_castps_si128(simde_float32x4_to_m128(a)), (n) * sizeof(simde_float32)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_f32(a, b, n) (__extension__ ({ \ - simde_float32x4_private simde_vextq_f32_r_; \ - simde_vextq_f32_r_.v128 = wasm_i32x4_shuffle(simde_float32x4_to_private(a).v128, simde_float32x4_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_float32x4_from_private(simde_vextq_f32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_f32(a, b, n) (__extension__ ({ \ - simde_float32x4_private simde_vextq_f32_r_; \ - simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_float32x4_from_private(simde_vextq_f32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_f32 - #define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - simde_float64x2_t r; - SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_float64x2_private - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_f64(a, b, n) simde_float64x2_from_m128d(_mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(simde_float64x2_to_m128d(b)), _mm_castpd_si128(simde_float64x2_to_m128d(a)), (n) * sizeof(simde_float64)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_f64(a, b, n) (__extension__ ({ \ - simde_float64x2_private simde_vextq_f64_r_; \ - simde_vextq_f64_r_.v128 = wasm_i64x2_shuffle(simde_float64x2_to_private(a).v128, simde_float64x2_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_float64x2_from_private(simde_vextq_f64_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_f64(a, b, n) (__extension__ ({ \ - simde_float64x2_private simde_vextq_f64_r_; \ - simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_float64x2_from_private(simde_vextq_f64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vextq_f64 - #define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int8x16_t r; - SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; - } - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_s8(a, b, n) simde_int8x16_from_m128i(_mm_alignr_epi8(simde_int8x16_to_m128i(b), simde_int8x16_to_m128i(a), n * sizeof(int8_t))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_s8(a, b, n) (__extension__ ({ \ - simde_int8x16_private simde_vextq_s8_r_; \ - simde_vextq_s8_r_.v128 = wasm_i8x16_shuffle(simde_int8x16_to_private(a).v128, simde_int8x16_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ - simde_int8x16_from_private(simde_vextq_s8_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_s8(a, b, n) (__extension__ ({ \ - simde_int8x16_private simde_vextq_s8_r_; \ - simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ - simde_int8x16_from_private(simde_vextq_s8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_s8 - #define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int16x8_t r; - SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_s16(a, b, n) simde_int16x8_from_m128i(_mm_alignr_epi8(simde_int16x8_to_m128i(b), simde_int16x8_to_m128i(a), n * sizeof(int16_t))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_s16(a, b, n) (__extension__ ({ \ - simde_int16x8_private simde_vextq_s16_r_; \ - simde_vextq_s16_r_.v128 = wasm_i16x8_shuffle(simde_int16x8_to_private(a).v128, simde_int16x8_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ - simde_int16x8_from_private(simde_vextq_s16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_s16(a, b, n) (__extension__ ({ \ - simde_int16x8_private simde_vextq_s16_r_; \ - simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ - simde_int16x8_from_private(simde_vextq_s16_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_s16 - #define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int32x4_t r; - SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_s32(a, b, n) simde_int32x4_from_m128i(_mm_alignr_epi8(simde_int32x4_to_m128i(b), simde_int32x4_to_m128i(a), n * sizeof(int32_t))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_s32(a, b, n) (__extension__ ({ \ - simde_int32x4_private simde_vextq_s32_r_; \ - simde_vextq_s32_r_.v128 = wasm_i32x4_shuffle(simde_int32x4_to_private(a).v128, simde_int32x4_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_int32x4_from_private(simde_vextq_s32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_s32(a, b, n) (__extension__ ({ \ - simde_int32x4_private simde_vextq_s32_r_; \ - simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_int32x4_from_private(simde_vextq_s32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_s32 - #define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_int64x2_t r; - SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_s64(a, b, n) simde_int64x2_from_m128i(_mm_alignr_epi8(simde_int64x2_to_m128i(b), simde_int64x2_to_m128i(a), n * sizeof(int64_t))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_vextq_s64(a, b, n) (__extension__ ({ \ - simde_int64x2_private simde_vextq_s64_r_; \ - simde_vextq_s64_r_.v128 = wasm_i64x2_shuffle(simde_int64x2_to_private(a).v128, simde_int64x2_to_private(b).v128, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_int64x2_from_private(simde_vextq_s64_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_s64(a, b, n) (__extension__ ({ \ - simde_int64x2_private simde_vextq_s64_r_; \ - simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_int64x2_from_private(simde_vextq_s64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_s64 - #define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint8x16_t r; - SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; - } - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_u8(a, b, n) simde_uint8x16_from_m128i(_mm_alignr_epi8(simde_uint8x16_to_m128i(b), simde_uint8x16_to_m128i(a), n * sizeof(uint8_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_u8(a, b, n) (__extension__ ({ \ - simde_uint8x16_private simde_vextq_u8_r_; \ - simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ - simde_uint8x16_from_private(simde_vextq_u8_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_u8 - #define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint16x8_t r; - SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint16x8_private - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_u16(a, b, n) simde_uint16x8_from_m128i(_mm_alignr_epi8(simde_uint16x8_to_m128i(b), simde_uint16x8_to_m128i(a), n * sizeof(uint16_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_u16(a, b, n) (__extension__ ({ \ - simde_uint16x8_private simde_vextq_u16_r_; \ - simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ - simde_uint16x8_from_private(simde_vextq_u16_r_); \ - })) -#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - #define simde_vextq_u16(a, b, n) (__extension__ ({ \ - simde_uint16x8_private r_; \ - r_.values = __builtin_shufflevector( \ - simde_uint16x8_to_private(a).values, \ - simde_uint16x8_to_private(b).values, \ - n + 0, n + 1, n + 2, n + 3, n + 4, n + 5, n + 6, n + 7); \ - simde_uint16x8_from_private(r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_u16 - #define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint32x4_t r; - SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_u32(a, b, n) simde_uint32x4_from_m128i(_mm_alignr_epi8(simde_uint32x4_to_m128i(b), simde_uint32x4_to_m128i(a), n * sizeof(uint32_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_u32(a, b, n) (__extension__ ({ \ - simde_uint32x4_private simde_vextq_u32_r_; \ - simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ - simde_uint32x4_from_private(simde_vextq_u32_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_u32 - #define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_uint64x2_t r; - SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_uint64x2_private - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) - #define simde_vextq_u64(a, b, n) simde_uint64x2_from_m128i(_mm_alignr_epi8(simde_uint64x2_to_m128i(b), simde_uint64x2_to_m128i(a), n * sizeof(uint64_t))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) - #define simde_vextq_u64(a, b, n) (__extension__ ({ \ - simde_uint64x2_private simde_vextq_u64_r_; \ - simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \ - HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ - simde_uint64x2_from_private(simde_vextq_u64_r_); \ - })) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_u64 - #define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vext_p8(simde_poly8x8_t a, simde_poly8x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x8_t r; - SIMDE_CONSTIFY_8_(vext_p8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_poly8x8_private - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_p8 - #define vext_p8(a, b, n) simde_vext_p8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vext_p16(simde_poly16x4_t a, simde_poly16x4_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x4_t r; - SIMDE_CONSTIFY_4_(vext_p16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_poly16x4_private - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; - } - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vext_p16 - #define vext_p16(a, b, n) simde_vext_p16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vext_p64(simde_poly64x1_t a, simde_poly64x1_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - (void) n; - return vext_p64(a, b, 0); - #else - simde_poly64x1_private - a_ = simde_poly64x1_to_private(a), - b_ = simde_poly64x1_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; - } - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vext_p64 - #define vext_p64(a, b, n) simde_vext_p64((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vextq_p8(simde_poly8x16_t a, simde_poly8x16_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x16_t r; - SIMDE_CONSTIFY_16_(vextq_p8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_poly8x16_private - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; - } - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_p8 - #define vextq_p8(a, b, n) simde_vextq_p8((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vextq_p16(simde_poly16x8_t a, simde_poly16x8_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x8_t r; - SIMDE_CONSTIFY_8_(vextq_p16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_poly16x8_private - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; - } - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vextq_p16 - #define vextq_p16(a, b, n) simde_vextq_p16((a), (b), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vextq_p64(simde_poly64x2_t a, simde_poly64x2_t b, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - simde_poly64x2_t r; - SIMDE_CONSTIFY_2_(vextq_p64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); - return r; - #else - simde_poly64x2_private - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b), - r_ = a_; - const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; - } - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vextq_p64 - #define vextq_p64(a, b, n) simde_vextq_p64((a), (b), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_EXT_H) */ -/* :: End simde/arm/neon/ext.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fma.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMA_H) -#define SIMDE_ARM_NEON_FMA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vfmah_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfmah_f16(a, b, c); - #else - return simde_vaddh_f16(a, simde_vmulh_f16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmah_f16 - #define vfmah_f16(a, b, c) simde_vfmah_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfma_f32(a, b, c); - #else - return simde_vadd_f32(a, simde_vmul_f32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfma_f32 - #define vfma_f32(a, b, c) simde_vfma_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfma_f64(a, b, c); - #else - return simde_vadd_f64(a, simde_vmul_f64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfma_f64 - #define vfma_f64(a, b, c) simde_vfma_f64(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vfma_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfma_f16(a, b, c); - #else - return simde_vadd_f16(a, simde_vmul_f16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfma_f16 - #define vfma_f16(a, b, c) simde_vfma_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vfmaq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfmaq_f16(a, b, c); - #else - return simde_vaddq_f16(a, simde_vmulq_f16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_f16 - #define vfmaq_f16(a, b, c) simde_vfmaq_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfmaq_f32(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_madd(b, c, a); - #elif \ - defined(SIMDE_X86_FMA_NATIVE) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b), - c_ = simde_float32x4_to_private(c); - - #if defined(SIMDE_X86_FMA_NATIVE) - r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128); - #endif - - return simde_float32x4_from_private(r_); - #else - return simde_vaddq_f32(a, simde_vmulq_f32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmaq_f32 - #define vfmaq_f32(a, b, c) simde_vfmaq_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vfmaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfmaq_f64(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_madd(b, c, a); - #elif \ - defined(SIMDE_X86_FMA_NATIVE) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b), - c_ = simde_float64x2_to_private(c); - - #if defined(SIMDE_X86_FMA_NATIVE) - r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d); - #endif - - return simde_float64x2_from_private(r_); - #else - return simde_vaddq_f64(a, simde_vmulq_f64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmaq_f64 - #define vfmaq_f64(a, b, c) simde_vfmaq_f64(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ -/* :: End simde/arm/neon/fma.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fma_lane.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Atharva Nimbalkar -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMA_LANE_H) -#define SIMDE_ARM_NEON_FMA_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mul_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MUL_LANE_H) -#define SIMDE_ARM_NEON_MUL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulh_lane_f16(simde_float16_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmulh_f16(a, simde_float16x4_to_private(b).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmulh_lane_f16(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_lane_f16(a, b, lane)) - #else - #define simde_vmulh_lane_f16(a, b, lane) vmulh_lane_f16((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulh_lane_f16 - #define vmulh_lane_f16(a, b, lane) simde_vmulh_lane_f16(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmuld_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return a * simde_float64x1_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmuld_lane_f64(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_lane_f64(a, b, lane)) - #else - #define simde_vmuld_lane_f64(a, b, lane) vmuld_lane_f64((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmuld_lane_f64 - #define vmuld_lane_f64(a, b, lane) simde_vmuld_lane_f64(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmuld_laneq_f64(simde_float64_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return a * simde_float64x2_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmuld_laneq_f64(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_laneq_f64(a, b, lane)) - #else - #define simde_vmuld_laneq_f64(a, b, lane) vmuld_laneq_f64((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmuld_laneq_f64 - #define vmuld_laneq_f64(a, b, lane) simde_vmuld_laneq_f64(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmuls_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return a * simde_float32x2_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmuls_lane_f32(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_lane_f32(a, b, lane)) - #else - #define simde_vmuls_lane_f32(a, b, lane) vmuls_lane_f32((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmuls_lane_f32 - #define vmuls_lane_f32(a, b, lane) simde_vmuls_lane_f32(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulh_laneq_f16(simde_float16_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmulh_f16(a, simde_float16x8_to_private(b).values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmulh_laneq_f16(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_laneq_f16(a, b, lane)) - #else - #define simde_vmulh_laneq_f16(a, b, lane) vmulh_laneq_f16((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulh_laneq_f16 - #define vmulh_laneq_f16(a, b, lane) simde_vmulh_laneq_f16(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return a * simde_float32x4_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vmuls_laneq_f32(a, b, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_laneq_f32(a, b, lane)) - #else - #define simde_vmuls_laneq_f32(a, b, lane) vmuls_laneq_f32((a), (b), (lane)) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmuls_laneq_f32 - #define vmuls_laneq_f32(a, b, lane) simde_vmuls_laneq_f32(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmul_lane_f16(simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmul_lane_f16(a, b, lane) vmul_lane_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_f16 - #define vmul_lane_f16(a, b, lane) simde_vmul_lane_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_f32 - #define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_f64 - #define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_s16 - #define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_s32 - #define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_u16 - #define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_lane_u32 - #define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmul_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - simde_int16x8_private - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_s16(a, b, lane) vmul_laneq_s16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_s16 - #define vmul_laneq_s16(a, b, lane) simde_vmul_laneq_s16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmul_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - simde_int32x4_private - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_s32(a, b, lane) vmul_laneq_s32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_s32 - #define vmul_laneq_s32(a, b, lane) simde_vmul_laneq_s32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmul_laneq_u16(simde_uint16x4_t a, simde_uint16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_uint16x8_private - b_ = simde_uint16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_u16(a, b, lane) vmul_laneq_u16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_u16 - #define vmul_laneq_u16(a, b, lane) simde_vmul_laneq_u16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmul_laneq_u32(simde_uint32x2_t a, simde_uint32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_uint32x4_private - b_ = simde_uint32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_u32(a, b, lane) vmul_laneq_u32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_u32 - #define vmul_laneq_u32(a, b, lane) simde_vmul_laneq_u32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulq_lane_f16(simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - simde_float16x4_private b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulq_lane_f16(a, b, lane) vmulq_lane_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_f16 - #define vmulq_lane_f16(a, b, lane) simde_vmulq_lane_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - simde_float32x2_private b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_f32 - #define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - simde_float64x1_private b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_f64 - #define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_s16 - #define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_s32 - #define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_uint16x4_private b_ = simde_uint16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_u16 - #define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_uint32x2_private b_ = simde_uint32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_lane_u32 - #define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulq_laneq_f16(simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulq_laneq_f16(a, b, lane) vmulq_laneq_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_f16 - #define vmulq_laneq_f16(a, b, lane) simde_vmulq_laneq_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_f32 - #define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_f64 - #define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_s16 - #define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_s32 - #define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_u16 - #define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_laneq_u32 - #define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmul_laneq_f16(simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - simde_float16x8_private b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmul_laneq_f16(a, b, lane) vmul_laneq_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_f16 - #define vmul_laneq_f16(a, b, lane) simde_vmul_laneq_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmul_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - simde_float32x4_private b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_f32(a, b, lane) vmul_laneq_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_f32 - #define vmul_laneq_f32(a, b, lane) simde_vmul_laneq_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmul_laneq_f64(simde_float64x1_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - simde_float64x2_private b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmul_laneq_f64(a, b, lane) vmul_laneq_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_laneq_f64 - #define vmul_laneq_f64(a, b, lane) simde_vmul_laneq_f64((a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */ -/* :: End simde/arm/neon/mul_lane.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* simde_vfmad_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmad_lane_f64(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_lane_f64(a, b, v, lane)) - #else - #define simde_vfmad_lane_f64(a, b, v, lane) vfmad_lane_f64((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmad_lane_f64(a, b, v, lane) \ - simde_vget_lane_f64( \ - simde_vadd_f64( \ - simde_vdup_n_f64(a), \ - simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmad_lane_f64 - #define vfmad_lane_f64(a, b, v, lane) simde_vfmad_lane_f64(a, b, v, lane) -#endif - -/* simde_vfmad_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmad_laneq_f64(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_laneq_f64(a, b, v, lane)) - #else - #define simde_vfmad_laneq_f64(a, b, v, lane) vfmad_laneq_f64((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmad_laneq_f64(a, b, v, lane) \ - simde_vget_lane_f64( \ - simde_vadd_f64( \ - simde_vdup_n_f64(a), \ - simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmad_laneq_f64 - #define vfmad_laneq_f64(a, b, v, lane) simde_vfmad_laneq_f64(a, b, v, lane) -#endif - -/* simde_vfmah_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmah_lane_f16(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmah_lane_f16(a, b, v, lane)) - #else - #define simde_vfmah_lane_f16(a, b, v, lane) vfmah_lane_f16((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmah_lane_f16(a, b, v, lane) \ - simde_vget_lane_f16( \ - simde_vadd_f16( \ - simde_vdup_n_f16(a), \ - simde_vdup_n_f16(simde_vmulh_lane_f16(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmah_lane_f16 - #define vfmah_lane_f16(a, b, v, lane) simde_vfmah_lane_f16(a, b, v, lane) -#endif - -/* simde_vfmah_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmah_laneq_f16(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmah_laneq_f16(a, b, v, lane)) - #else - #define simde_vfmah_laneq_f16(a, b, v, lane) vfmah_laneq_f16((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmah_laneq_f16(a, b, v, lane) \ - simde_vget_lane_f16( \ - simde_vadd_f16( \ - simde_vdup_n_f16(a), \ - simde_vdup_n_f16(simde_vmulh_laneq_f16(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmah_laneq_f16 - #define vfmah_laneq_f16(a, b, v, lane) simde_vfmah_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfmas_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmas_lane_f32(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_lane_f32(a, b, v, lane)) - #else - #define simde_vfmas_lane_f32(a, b, v, lane) vfmas_lane_f32((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmas_lane_f32(a, b, v, lane) \ - simde_vget_lane_f32( \ - simde_vadd_f32( \ - simde_vdup_n_f32(a), \ - simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmas_lane_f32 - #define vfmas_lane_f32(a, b, v, lane) simde_vfmas_lane_f32(a, b, v, lane) -#endif - -/* simde_vfmas_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmas_laneq_f32(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_laneq_f32(a, b, v, lane)) - #else - #define simde_vfmas_laneq_f32(a, b, v, lane) vfmas_laneq_f32((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmas_laneq_f32(a, b, v, lane) \ - simde_vget_lane_f32( \ - simde_vadd_f32( \ - simde_vdup_n_f32(a), \ - simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmas_laneq_f32 - #define vfmas_laneq_f32(a, b, v, lane) simde_vfmas_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfma_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfma_lane_f16(a, b, v, lane) vfma_lane_f16(a, b, v, lane) -#else - #define simde_vfma_lane_f16(a, b, v, lane) simde_vadd_f16(a, simde_vmul_lane_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_lane_f16 - #define vfma_lane_f16(a, b, v, lane) simde_vfma_lane_f16(a, b, v, lane) -#endif - -/* simde_vfma_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane) -#else - #define simde_vfma_lane_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_lane_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_lane_f32 - #define vfma_lane_f32(a, b, v, lane) simde_vfma_lane_f32(a, b, v, lane) -#endif - -/* simde_vfma_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfma_lane_f64(a, b, v, lane) vfma_lane_f64((a), (b), (v), (lane)) -#else - #define simde_vfma_lane_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_lane_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_lane_f64 - #define vfma_lane_f64(a, b, v, lane) simde_vfma_lane_f64(a, b, v, lane) -#endif - -/* simde_vfma_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfma_laneq_f16(a, b, v, lane) vfma_laneq_f16((a), (b), (v), (lane)) -#else - #define simde_vfma_laneq_f16(a, b, v, lane) simde_vadd_f16(a, simde_vmul_laneq_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_laneq_f16 - #define vfma_laneq_f16(a, b, v, lane) simde_vfma_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfma_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vfma_laneq_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_laneq_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_laneq_f32 - #define vfma_laneq_f32(a, b, v, lane) simde_vfma_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfma_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfma_laneq_f64(a, b, v, lane) vfma_laneq_f64((a), (b), (v), (lane)) -#else - #define simde_vfma_laneq_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_laneq_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_laneq_f64 - #define vfma_laneq_f64(a, b, v, lane) simde_vfma_laneq_f64(a, b, v, lane) -#endif - -/* simde_vfmaq_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmaq_lane_f64(a, b, v, lane) vfmaq_lane_f64((a), (b), (v), (lane)) -#else - #define simde_vfmaq_lane_f64(a, b, v, lane) simde_vaddq_f64(a, simde_vmulq_lane_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_lane_f64 - #define vfmaq_lane_f64(a, b, v, lane) simde_vfmaq_lane_f64(a, b, v, lane) -#endif - -/* simde_vfmaq_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfmaq_lane_f16(a, b, v, lane) vfmaq_lane_f16((a), (b), (v), (lane)) -#else - #define simde_vfmaq_lane_f16(a, b, v, lane) simde_vaddq_f16(a, simde_vmulq_lane_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_lane_f16 - #define vfmaq_lane_f16(a, b, v, lane) simde_vfmaq_lane_f16(a, b, v, lane) -#endif - -/* simde_vfmaq_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vfmaq_lane_f32(a, b, v, lane) simde_vaddq_f32(a, simde_vmulq_lane_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_lane_f32 - #define vfmaq_lane_f32(a, b, v, lane) simde_vfmaq_lane_f32(a, b, v, lane) -#endif - -/* simde_vfmaq_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfmaq_laneq_f16(a, b, v, lane) vfmaq_laneq_f16((a), (b), (v), (lane)) -#else - #define simde_vfmaq_laneq_f16(a, b, v, lane) \ - simde_vaddq_f16(a, simde_vmulq_laneq_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_laneq_f16 - #define vfmaq_laneq_f16(a, b, v, lane) simde_vfmaq_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfmaq_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vfmaq_laneq_f32(a, b, v, lane) \ - simde_vaddq_f32(a, simde_vmulq_laneq_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_laneq_f32 - #define vfmaq_laneq_f32(a, b, v, lane) simde_vfmaq_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfmaq_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmaq_laneq_f64(a, b, v, lane) vfmaq_laneq_f64((a), (b), (v), (lane)) -#else - #define simde_vfmaq_laneq_f64(a, b, v, lane) \ - simde_vaddq_f64(a, simde_vmulq_laneq_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_laneq_f64 - #define vfmaq_laneq_f64(a, b, v, lane) simde_vfmaq_laneq_f64(a, b, v, lane) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMA_LANE_H) */ -/* :: End simde/arm/neon/fma_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fma_n.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2021 Evan Nemerson -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMA_N_H) -#define SIMDE_ARM_NEON_FMA_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vfma_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vfma_n_f16(a, b, c); - #else - return simde_vfma_f16(a, b, simde_vdup_n_f16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfma_n_f16 - #define vfma_n_f16(a, b, c) simde_vfma_n_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vfmaq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vfmaq_n_f16(a, b, c); - #else - return simde_vfmaq_f16(a, b, simde_vdupq_n_f16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmaq_n_f16 - #define vfmaq_n_f16(a, b, c) simde_vfmaq_n_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) - return vfma_n_f32(a, b, c); - #else - return simde_vfma_f32(a, b, simde_vdup_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfma_n_f32 - #define vfma_n_f32(a, b, c) simde_vfma_n_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vfma_n_f64(a, b, c); - #else - return simde_vfma_f64(a, b, simde_vdup_n_f64(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfma_n_f64 - #define vfma_n_f64(a, b, c) simde_vfma_n_f64(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) - return vfmaq_n_f32(a, b, c); - #else - return simde_vfmaq_f32(a, b, simde_vdupq_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmaq_n_f32 - #define vfmaq_n_f32(a, b, c) simde_vfmaq_n_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vfmaq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vfmaq_n_f64(a, b, c); - #else - return simde_vfmaq_f64(a, b, simde_vdupq_n_f64(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmaq_n_f64 - #define vfmaq_n_f64(a, b, c) simde_vfmaq_n_f64(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ -/* :: End simde/arm/neon/fma_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fmlal.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_FMLAL_H) -#define SIMDE_ARM_NEON_FMLAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlal_low_f16(r, a, b); - #else - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[i]); - } - return simde_float32x2_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_low_f16 - #define vfmlal_low_f16(r, a, b) simde_vfmlal_low_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlalq_low_f16(r, a, b); - #else - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[i]); - } - return simde_float32x4_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_low_f16 - #define vfmlalq_low_f16(r, a, b) simde_vfmlalq_low_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlal_high_f16(r, a, b); - #else - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[i+high_offset]); - } - return simde_float32x2_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_high_f16 - #define vfmlal_high_f16(r, a, b) simde_vfmlal_high_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlalq_high_f16(r, a, b); - #else - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[i+high_offset]); - } - return simde_float32x4_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_high_f16 - #define vfmlalq_high_f16(r, a, b) simde_vfmlalq_high_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_lane_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlal_lane_low_f16(r, a, b, lane) vfmlal_lane_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_lane_low_f16 - #define vfmlal_lane_low_f16(r, a, b, lane) simde_vfmlal_lane_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_laneq_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a); - simde_float16x8_private - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlal_laneq_low_f16(r, a, b, lane) vfmlal_laneq_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_laneq_low_f16 - #define vfmlal_laneq_low_f16(r, a, b, lane) simde_vfmlal_laneq_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_lane_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x4_private - b_ = simde_float16x4_to_private(b); - simde_float16x8_private - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlalq_lane_low_f16(r, a, b, lane) vfmlalq_lane_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_lane_low_f16 - #define vfmlalq_lane_low_f16(r, a, b, lane) simde_vfmlalq_lane_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_laneq_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlalq_laneq_low_f16(r, a, b, lane) vfmlalq_laneq_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_laneq_low_f16 - #define vfmlalq_laneq_low_f16(r, a, b, lane) simde_vfmlalq_laneq_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_lane_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlal_lane_high_f16(r, a, b, lane) vfmlal_lane_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_lane_high_f16 - #define vfmlal_lane_high_f16(r, a, b, lane) simde_vfmlal_lane_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlal_laneq_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a); - simde_float16x8_private - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlal_laneq_high_f16(r, a, b, lane) vfmlal_laneq_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlal_laneq_high_f16 - #define vfmlal_laneq_high_f16(r, a, b, lane) simde_vfmlal_laneq_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_lane_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x4_private - b_ = simde_float16x4_to_private(b); - simde_float16x8_private - a_ = simde_float16x8_to_private(a); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlalq_lane_high_f16(r, a, b, lane) vfmlalq_lane_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_lane_high_f16 - #define vfmlalq_lane_high_f16(r, a, b, lane) simde_vfmlalq_lane_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlalq_laneq_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] + - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlalq_laneq_high_f16(r, a, b, lane) vfmlalq_laneq_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlalq_laneq_high_f16 - #define vfmlalq_laneq_high_f16(r, a, b, lane) simde_vfmlalq_laneq_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlalbq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vbfmlalbq_f32(r, a, b); - #else - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2]) * simde_bfloat16_to_float32(b_.values[i * 2]); - } - return simde_float32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlalbq_f32 - #define vbfmlalbq_f32(r, a, b) simde_vbfmlalbq_f32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlaltq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vbfmlaltq_f32(r, a, b); - #else - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2 + 1]) * simde_bfloat16_to_float32(b_.values[i * 2 + 1]); - } - return simde_float32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlaltq_f32 - #define vbfmlaltq_f32(r, a, b) simde_vbfmlaltq_f32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlalbq_lane_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_bfloat16x4_private b_ = simde_bfloat16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2]) * simde_bfloat16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vbfmlalbq_lane_f32(r, a, b, lane) vbfmlalbq_lane_f32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlalbq_lane_f32 - #define vbfmlalbq_lane_f32(r, a, b, lane) simde_vbfmlalbq_lane_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlalbq_laneq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2]) * simde_bfloat16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vbfmlalbq_laneq_f32(r, a, b, lane) vbfmlalbq_laneq_f32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlalbq_laneq_f32 - #define vbfmlalbq_laneq_f32(r, a, b, lane) simde_vbfmlalbq_laneq_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlaltq_lane_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private a_ = simde_bfloat16x8_to_private(a); - simde_bfloat16x4_private b_ = simde_bfloat16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2 + 1]) * simde_bfloat16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vbfmlaltq_lane_f32(r, a, b, lane) vbfmlaltq_lane_f32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlaltq_lane_f32 - #define vbfmlaltq_lane_f32(r, a, b, lane) simde_vbfmlaltq_lane_f32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmlaltq_laneq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret, - r_ = simde_float32x4_to_private(r); - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { - ret.values[i] = r_.values[i] + - simde_bfloat16_to_float32(a_.values[i * 2 + 1]) * simde_bfloat16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vbfmlaltq_laneq_f32(r, a, b, lane) vbfmlaltq_laneq_f32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmlaltq_laneq_f32 - #define vbfmlaltq_laneq_f32(r, a, b, lane) simde_vbfmlaltq_laneq_f32((r), (a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMLAL_H) */ -/* :: End simde/arm/neon/fmlal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fmlsl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_FMLSL_H) -#define SIMDE_ARM_NEON_FMLSL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlsl_low_f16(r, a, b); - #else - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[i]); - } - return simde_float32x2_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_low_f16 - #define vfmlsl_low_f16(r, a, b) simde_vfmlsl_low_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlslq_low_f16(r, a, b); - #else - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[i]); - } - return simde_float32x4_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_low_f16 - #define vfmlslq_low_f16(r, a, b) simde_vfmlslq_low_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlsl_high_f16(r, a, b); - #else - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[i+high_offset]); - } - return simde_float32x2_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_high_f16 - #define vfmlsl_high_f16(r, a, b) simde_vfmlsl_high_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - return vfmlslq_high_f16(r, a, b); - #else - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[i+high_offset]); - } - return simde_float32x4_from_private(ret_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_high_f16 - #define vfmlslq_high_f16(r, a, b) simde_vfmlslq_high_f16((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_lane_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlsl_lane_low_f16(r, a, b, lane) vfmlsl_lane_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_lane_low_f16 - #define vfmlsl_lane_low_f16(r, a, b, lane) simde_vfmlsl_lane_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_laneq_low_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a); - simde_float16x8_private - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlsl_laneq_low_f16(r, a, b, lane) vfmlsl_laneq_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_laneq_low_f16 - #define vfmlsl_laneq_low_f16(r, a, b, lane) simde_vfmlsl_laneq_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_lane_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x4_private - b_ = simde_float16x4_to_private(b); - simde_float16x8_private - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlslq_lane_low_f16(r, a, b, lane) vfmlslq_lane_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_lane_low_f16 - #define vfmlslq_lane_low_f16(r, a, b, lane) simde_vfmlslq_lane_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_laneq_low_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlslq_laneq_low_f16(r, a, b, lane) vfmlslq_laneq_low_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_laneq_low_f16 - #define vfmlslq_laneq_low_f16(r, a, b, lane) simde_vfmlslq_laneq_low_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_lane_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlsl_lane_high_f16(r, a, b, lane) vfmlsl_lane_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_lane_high_f16 - #define vfmlsl_lane_high_f16(r, a, b, lane) simde_vfmlsl_lane_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfmlsl_laneq_high_f16(simde_float32x2_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x2_private - ret_, - r_ = simde_float32x2_to_private(r); - simde_float16x4_private - a_ = simde_float16x4_to_private(a); - simde_float16x8_private - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x2_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlsl_laneq_high_f16(r, a, b, lane) vfmlsl_laneq_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlsl_laneq_high_f16 - #define vfmlsl_laneq_high_f16(r, a, b, lane) simde_vfmlsl_laneq_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_lane_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x4_private - b_ = simde_float16x4_to_private(b); - simde_float16x8_private - a_ = simde_float16x8_to_private(a); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlslq_lane_high_f16(r, a, b, lane) vfmlslq_lane_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_lane_high_f16 - #define vfmlslq_lane_high_f16(r, a, b, lane) simde_vfmlslq_lane_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmlslq_laneq_high_f16(simde_float32x4_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float32x4_private - ret_, - r_ = simde_float32x4_to_private(r); - simde_float16x8_private - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - size_t high_offset = sizeof(a_.values) / sizeof(a_.values[0]) / 2; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(ret_.values) / sizeof(ret_.values[0])) ; i++) { - ret_.values[i] = r_.values[i] - - simde_float16_to_float32(a_.values[i+high_offset]) * simde_float16_to_float32(b_.values[lane]); - } - return simde_float32x4_from_private(ret_); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - defined(__ARM_FEATURE_FP16_FML) - #define simde_vfmlslq_laneq_high_f16(r, a, b, lane) vfmlslq_laneq_high_f16((r), (a), (b), (lane)); -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmlslq_laneq_high_f16 - #define vfmlslq_laneq_high_f16(r, a, b, lane) simde_vfmlslq_laneq_high_f16((r), (a), (b), (lane)); -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMLSL_H) */ -/* :: End simde/arm/neon/fmlsl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fms.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMS_H) -#define SIMDE_ARM_NEON_FMS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/neg.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_NEG_H) -#define SIMDE_ARM_NEON_NEG_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vnegd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) - return vnegd_s64(a); - #else - return -a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vnegd_s64 - #define vnegd_s64(a) simde_vnegd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vnegh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vnegh_f16(a); - #else - return simde_float16_from_float32(-simde_float16_to_float32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vnegh_f16 - #define vnegh_f16(a) simde_vnegh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vneg_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vneg_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vnegh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vneg_f16 - #define vneg_f16(a) simde_vneg_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vneg_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vneg_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vneg_f32 - #define vneg_f32(a) simde_vneg_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vneg_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vneg_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vneg_f64 - #define vneg_f64(a) simde_vneg_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vneg_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vneg_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vneg_s8 - #define vneg_s8(a) simde_vneg_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vneg_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vneg_s16(a); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vneg_s16 - #define vneg_s16(a) simde_vneg_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vneg_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vneg_s32(a); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vneg_s32 - #define vneg_s32(a) simde_vneg_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vneg_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vneg_s64(a); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vnegd_s64(a_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vneg_s64 - #define vneg_s64(a) simde_vneg_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vnegq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vnegq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vnegh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vnegq_f16 - #define vnegq_f16(a) simde_vnegq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vnegq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vnegq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128 = _mm_castsi128_ps(_mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), _mm_castps_si128(a_.m128))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vnegq_f32 - #define vnegq_f32(a) simde_vnegq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vnegq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vnegq_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_castsi128_pd(_mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), _mm_castpd_si128(a_.m128d))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vnegq_f64 - #define vnegq_f64(a) simde_vnegq_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vnegq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vnegq_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vnegq_s8 - #define vnegq_s8(a) simde_vnegq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vnegq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vnegq_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vnegq_s16 - #define vnegq_s16(a) simde_vnegq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vnegq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vnegq_s32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vnegq_s32 - #define vnegq_s32(a) simde_vnegq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vnegq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vnegq_s64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return vec_neg(a); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_neg(a_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vnegd_s64(a_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vnegq_s64 - #define vnegq_s64(a) simde_vnegq_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_NEG_H) */ -/* :: End simde/arm/neon/neg.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vfmsh_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfmsh_f16(a, b, c); - #else - return simde_vaddh_f16(a, simde_vnegh_f16(simde_vmulh_f16(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmsh_f16 - #define vfmsh_f16(a, b, c) simde_vfmsh_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfms_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfms_f32(a, b, c); - #else - return simde_vadd_f32(a, simde_vneg_f32(simde_vmul_f32(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfms_f32 - #define vfms_f32(a, b, c) simde_vfms_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vfms_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfms_f64(a, b, c); - #else - return simde_vadd_f64(a, simde_vneg_f64(simde_vmul_f64(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfms_f64 - #define vfms_f64(a, b, c) simde_vfms_f64(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vfms_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfms_f16(a, b, c); - #else - return simde_vadd_f16(a, simde_vneg_f16(simde_vmul_f16(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfms_f16 - #define vfms_f16(a, b, c) simde_vfms_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vfmsq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - return vfmsq_f16(a, b, c); - #else - return simde_vaddq_f16(a, simde_vnegq_f16(simde_vmulq_f16(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_f16 - #define vfmsq_f16(a, b, c) simde_vfmsq_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfmsq_f32(a, b, c); - #else - return simde_vaddq_f32(a, simde_vnegq_f32(simde_vmulq_f32(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmsq_f32 - #define vfmsq_f32(a, b, c) simde_vfmsq_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vfmsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - return vfmsq_f64(a, b, c); - #else - return simde_vaddq_f64(a, simde_vnegq_f64(simde_vmulq_f64(b, c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_f64 - #define vfmsq_f64(a, b, c) simde_vfmsq_f64(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMS_H) */ -/* :: End simde/arm/neon/fms.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fms_lane.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMS_LANE_H) -#define SIMDE_ARM_NEON_FMS_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* simde_vfmsd_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmsd_lane_f64(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsd_lane_f64(a, b, v, lane)) - #else - #define simde_vfmsd_lane_f64(a, b, v, lane) vfmsd_lane_f64((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmsd_lane_f64(a, b, v, lane) \ - simde_vget_lane_f64( \ - simde_vsub_f64( \ - simde_vdup_n_f64(a), \ - simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsd_lane_f64 - #define vfmsd_lane_f64(a, b, v, lane) simde_vfmsd_lane_f64(a, b, v, lane) -#endif - -/* simde_vfmsd_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmsd_laneq_f64(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsd_laneq_f64(a, b, v, lane)) - #else - #define simde_vfmsd_laneq_f64(a, b, v, lane) vfmsd_laneq_f64((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmsd_laneq_f64(a, b, v, lane) \ - simde_vget_lane_f64( \ - simde_vsub_f64( \ - simde_vdup_n_f64(a), \ - simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsd_laneq_f64 - #define vfmsd_laneq_f64(a, b, v, lane) simde_vfmsd_laneq_f64(a, b, v, lane) -#endif - -/* simde_vfmsh_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmsh_lane_f16(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsh_lane_f16(a, b, v, lane)) - #else - #define simde_vfmsh_lane_f16(a, b, v, lane) vfmsh_lane_f16((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmsh_lane_f16(a, b, v, lane) \ - simde_vget_lane_f16( \ - simde_vsub_f16( \ - simde_vdup_n_f16(a), \ - simde_vdup_n_f16(simde_vmulh_lane_f16(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsh_lane_f16 - #define vfmsh_lane_f16(a, b, v, lane) simde_vfmsh_lane_f16(a, b, v, lane) -#endif - -/* simde_vfmsh_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmsh_laneq_f16(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsh_laneq_f16(a, b, v, lane)) - #else - #define simde_vfmsh_laneq_f16(a, b, v, lane) vfmsh_laneq_f16((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmsh_laneq_f16(a, b, v, lane) \ - simde_vget_lane_f16( \ - simde_vsub_f16( \ - simde_vdup_n_f16(a), \ - simde_vdup_n_f16(simde_vmulh_laneq_f16(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsh_laneq_f16 - #define vfmsh_laneq_f16(a, b, v, lane) simde_vfmsh_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfmss_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmss_lane_f32(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmss_lane_f32(a, b, v, lane)) - #else - #define simde_vfmss_lane_f32(a, b, v, lane) vfmss_lane_f32((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmss_lane_f32(a, b, v, lane) \ - simde_vget_lane_f32( \ - simde_vsub_f32( \ - simde_vdup_n_f32(a), \ - simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmss_lane_f32 - #define vfmss_lane_f32(a, b, v, lane) simde_vfmss_lane_f32(a, b, v, lane) -#endif - -/* simde_vfmss_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vfmss_laneq_f32(a, b, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmss_laneq_f32(a, b, v, lane)) - #else - #define simde_vfmss_laneq_f32(a, b, v, lane) vfmss_laneq_f32((a), (b), (v), (lane)) - #endif -#else - #define simde_vfmss_laneq_f32(a, b, v, lane) \ - simde_vget_lane_f32( \ - simde_vsub_f32( \ - simde_vdup_n_f32(a), \ - simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \ - ), \ - 0 \ - ) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmss_laneq_f32 - #define vfmss_laneq_f32(a, b, v, lane) simde_vfmss_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfms_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfms_lane_f16(a, b, v, lane) vfms_lane_f16(a, b, v, lane) -#else - #define simde_vfms_lane_f16(a, b, v, lane) simde_vsub_f16(a, simde_vmul_lane_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_lane_f16 - #define vfms_lane_f16(a, b, v, lane) simde_vfms_lane_f16(a, b, v, lane) -#endif - -/* simde_vfms_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfms_lane_f32(a, b, v, lane) vfms_lane_f32(a, b, v, lane) -#else - #define simde_vfms_lane_f32(a, b, v, lane) simde_vsub_f32(a, simde_vmul_lane_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_lane_f32 - #define vfms_lane_f32(a, b, v, lane) simde_vfms_lane_f32(a, b, v, lane) -#endif - -/* simde_vfms_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfms_lane_f64(a, b, v, lane) vfms_lane_f64((a), (b), (v), (lane)) -#else - #define simde_vfms_lane_f64(a, b, v, lane) simde_vsub_f64(a, simde_vmul_lane_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_lane_f64 - #define vfms_lane_f64(a, b, v, lane) simde_vfms_lane_f64(a, b, v, lane) -#endif - -/* simde_vfms_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfms_laneq_f16(a, b, v, lane) vfms_laneq_f16((a), (b), (v), (lane)) -#else - #define simde_vfms_laneq_f16(a, b, v, lane) simde_vsub_f16(a, simde_vmul_laneq_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_laneq_f16 - #define vfms_laneq_f16(a, b, v, lane) simde_vfms_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfms_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfms_laneq_f32(a, b, v, lane) vfms_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vfms_laneq_f32(a, b, v, lane) simde_vsub_f32(a, simde_vmul_laneq_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_laneq_f32 - #define vfms_laneq_f32(a, b, v, lane) simde_vfms_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfms_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfms_laneq_f64(a, b, v, lane) vfms_laneq_f64((a), (b), (v), (lane)) -#else - #define simde_vfms_laneq_f64(a, b, v, lane) simde_vsub_f64(a, simde_vmul_laneq_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_laneq_f64 - #define vfms_laneq_f64(a, b, v, lane) simde_vfms_laneq_f64(a, b, v, lane) -#endif - -/* simde_vfmsq_lane_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmsq_lane_f64(a, b, v, lane) vfmsq_lane_f64((a), (b), (v), (lane)) -#else - #define simde_vfmsq_lane_f64(a, b, v, lane) simde_vsubq_f64(a, simde_vmulq_lane_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_lane_f64 - #define vfmsq_lane_f64(a, b, v, lane) simde_vfmsq_lane_f64(a, b, v, lane) -#endif - -/* simde_vfmsq_lane_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfmsq_lane_f16(a, b, v, lane) vfmsq_lane_f16((a), (b), (v), (lane)) -#else - #define simde_vfmsq_lane_f16(a, b, v, lane) simde_vsubq_f16(a, simde_vmulq_lane_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_lane_f16 - #define vfmsq_lane_f16(a, b, v, lane) simde_vfmsq_lane_f16(a, b, v, lane) -#endif - -/* simde_vfmsq_lane_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmsq_lane_f32(a, b, v, lane) vfmsq_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vfmsq_lane_f32(a, b, v, lane) simde_vsubq_f32(a, simde_vmulq_lane_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_lane_f32 - #define vfmsq_lane_f32(a, b, v, lane) simde_vfmsq_lane_f32(a, b, v, lane) -#endif - -/* simde_vfmsq_laneq_f16 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vfmsq_laneq_f16(a, b, v, lane) vfmsq_laneq_f16((a), (b), (v), (lane)) -#else - #define simde_vfmsq_laneq_f16(a, b, v, lane) \ - simde_vsubq_f16(a, simde_vmulq_laneq_f16(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_laneq_f16 - #define vfmsq_laneq_f16(a, b, v, lane) simde_vfmsq_laneq_f16(a, b, v, lane) -#endif - -/* simde_vfmsq_laneq_f32 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmsq_laneq_f32(a, b, v, lane) vfmsq_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vfmsq_laneq_f32(a, b, v, lane) \ - simde_vsubq_f32(a, simde_vmulq_laneq_f32(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_laneq_f32 - #define vfmsq_laneq_f32(a, b, v, lane) simde_vfmsq_laneq_f32(a, b, v, lane) -#endif - -/* simde_vfmsq_laneq_f64 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - #define simde_vfmsq_laneq_f64(a, b, v, lane) vfmsq_laneq_f64((a), (b), (v), (lane)) -#else - #define simde_vfmsq_laneq_f64(a, b, v, lane) \ - simde_vsubq_f64(a, simde_vmulq_laneq_f64(b, v, lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_laneq_f64 - #define vfmsq_laneq_f64(a, b, v, lane) simde_vfmsq_laneq_f64(a, b, v, lane) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMS_LANE_H) */ -/* :: End simde/arm/neon/fms_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/fms_n.h :: */ -/* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) -*/ - -#if !defined(SIMDE_ARM_NEON_FMS_N_H) -#define SIMDE_ARM_NEON_FMS_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vfms_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vfms_n_f16(a, b, c); - #else - return simde_vfms_f16(a, b, simde_vdup_n_f16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_n_f16 - #define vfms_n_f16(a, b, c) simde_vfms_n_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vfmsq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vfmsq_n_f16(a, b, c); - #else - return simde_vfmsq_f16(a, b, simde_vdupq_n_f16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_n_f16 - #define vfmsq_n_f16(a, b, c) simde_vfmsq_n_f16(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vfms_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) - return vfms_n_f32(a, b, c); - #else - return simde_vfms_f32(a, b, simde_vdup_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfms_n_f32 - #define vfms_n_f32(a, b, c) simde_vfms_n_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vfms_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vfms_n_f64(a, b, c); - #else - return simde_vfms_f64(a, b, simde_vdup_n_f64(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfms_n_f64 - #define vfms_n_f64(a, b, c) simde_vfms_n_f64(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vfmsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) - return vfmsq_n_f32(a, b, c); - #else - return simde_vfmsq_f32(a, b, simde_vdupq_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vfmsq_n_f32 - #define vfmsq_n_f32(a, b, c) simde_vfmsq_n_f32(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vfmsq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vfmsq_n_f64(a, b, c); - #else - return simde_vfmsq_f64(a, b, simde_vdupq_n_f64(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vfmsq_n_f64 - #define vfmsq_n_f64(a, b, c) simde_vfmsq_n_f64(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_FMS_N_H) */ -/* :: End simde/arm/neon/fms_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/hadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* TODO: the 128-bit versions only require AVX-512 because of the final - * conversions from larger types down to smaller ones. We could get - * the same results from AVX/AVX2 instructions with some shuffling - * to extract the low half of each input element to the low half - * of a 256-bit vector, then cast that to a 128-bit vector. */ - -#if !defined(SIMDE_ARM_NEON_HADD_H) -#define SIMDE_ARM_NEON_HADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_s8(a, b); - #else - return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddl_s8(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_s8 - #define vhadd_s8(a, b) simde_vhadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_s16(a, b); - #else - return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddl_s16(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_s16 - #define vhadd_s16(a, b) simde_vhadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_s32(a, b); - #else - return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddl_s32(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_s32 - #define vhadd_s32(a, b) simde_vhadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_u8(a, b); - #else - return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddl_u8(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_u8 - #define vhadd_u8(a, b) simde_vhadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_u16(a, b); - #else - return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddl_u16(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_u16 - #define vhadd_u16(a, b) simde_vhadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhadd_u32(a, b); - #else - return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddl_u32(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhadd_u32 - #define vhadd_u32(a, b) simde_vhadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_add_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) + HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_s8 - #define vhaddq_s8(a, b) simde_vhaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_add_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) + HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_s16 - #define vhaddq_s16(a, b) simde_vhaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_add_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) + HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_s32 - #define vhaddq_s32(a, b) simde_vhaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_add_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t lo = - wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_low_u8x16(a_.v128), - wasm_u16x8_extend_low_u8x16(b_.v128)), - 1); - v128_t hi = - wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_high_u8x16(a_.v128), - wasm_u16x8_extend_high_u8x16(b_.v128)), - 1); - r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, - 22, 24, 26, 28, 30); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_u8 - #define vhaddq_u8(a, b) simde_vhaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_add_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_u16 - #define vhaddq_u16(a, b) simde_vhaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhaddq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_add_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhaddq_u32 - #define vhaddq_u32(a, b) simde_vhaddq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_HADD_H) */ -/* :: End simde/arm/neon/hadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/hsub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* TODO: the 128-bit versions only require AVX-512 because of the final - * conversions from larger types down to smaller ones. We could get - * the same results from AVX/AVX2 instructions with some shuffling - * to extract the low half of each input element to the low half - * of a 256-bit vector, then cast that to a 128-bit vector. */ - -#if !defined(SIMDE_ARM_NEON_HSUB_H) -#define SIMDE_ARM_NEON_HSUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vhsub_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_s8(a, b); - #else - return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubl_s8(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_s8 - #define vhsub_s8(a, b) simde_vhsub_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vhsub_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_s16(a, b); - #else - return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubl_s16(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_s16 - #define vhsub_s16(a, b) simde_vhsub_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vhsub_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_s32(a, b); - #else - return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubl_s32(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_s32 - #define vhsub_s32(a, b) simde_vhsub_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vhsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_u8(a, b); - #else - return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubl_u8(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_u8 - #define vhsub_u8(a, b) simde_vhsub_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vhsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_u16(a, b); - #else - return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubl_u16(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_u16 - #define vhsub_u16(a, b) simde_vhsub_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vhsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsub_u32(a, b); - #else - return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubl_u32(a, b), 1)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsub_u32 - #define vhsub_u32(a, b) simde_vhsub_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vhsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_sub_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_s8 - #define vhsubq_s8(a, b) simde_vhsubq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vhsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_sub_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_s16 - #define vhsubq_s16(a, b) simde_vhsubq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vhsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_s32 - #define vhsubq_s32(a, b) simde_vhsubq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vhsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_sub_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t lo = - wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_low_u8x16(a_.v128), - wasm_u16x8_extend_low_u8x16(b_.v128)), - 1); - v128_t hi = - wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_high_u8x16(a_.v128), - wasm_u16x8_extend_high_u8x16(b_.v128)), - 1); - r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, - 22, 24, 26, 28, 30); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_u8 - #define vhsubq_u8(a, b) simde_vhsubq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vhsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_sub_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_u16 - #define vhsubq_u16(a, b) simde_vhsubq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vhsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vhsubq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_sub_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vhsubq_u32 - #define vhsubq_u32(a, b) simde_vhsubq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_HSUB_H) */ -/* :: End simde/arm/neon/hsub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_H) -#define SIMDE_ARM_NEON_LD1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vld1_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld1_f16(ptr); - #else - simde_float16x4_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f16 - #define vld1_f16(a) simde_vld1_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vld1_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_f32(ptr); - #else - simde_float32x2_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f32 - #define vld1_f32(a) simde_vld1_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vld1_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(1)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1_f64(ptr); - #else - simde_float64x1_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_f64 - #define vld1_f64(a) simde_vld1_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vld1_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_s8(ptr); - #else - simde_int8x8_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s8 - #define vld1_s8(a) simde_vld1_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vld1_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_s16(ptr); - #else - simde_int16x4_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s16 - #define vld1_s16(a) simde_vld1_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vld1_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_s32(ptr); - #else - simde_int32x2_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s32 - #define vld1_s32(a) simde_vld1_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vld1_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_s64(ptr); - #else - simde_int64x1_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s64 - #define vld1_s64(a) simde_vld1_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vld1_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_u8(ptr); - #else - simde_uint8x8_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u8 - #define vld1_u8(a) simde_vld1_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vld1_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_u16(ptr); - #else - simde_uint16x4_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u16 - #define vld1_u16(a) simde_vld1_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vld1_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_u32(ptr); - #else - simde_uint32x2_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u32 - #define vld1_u32(a) simde_vld1_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vld1_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_u64(ptr); - #else - simde_uint64x1_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u64 - #define vld1_u64(a) simde_vld1_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vld1q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld1q_f16(ptr); - #else - simde_float16x8_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f16 - #define vld1q_f16(a) simde_vld1q_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_f32(ptr); - #else - simde_float32x4_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f32 - #define vld1q_f32(a) simde_vld1q_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(ptr); - #else - simde_float64x2_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_f64 - #define vld1q_f64(a) simde_vld1q_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s8(ptr); - #else - simde_int8x16_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s8 - #define vld1q_s8(a) simde_vld1q_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s16(ptr); - #else - simde_int16x8_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s16 - #define vld1q_s16(a) simde_vld1q_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s32(ptr); - #else - simde_int32x4_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s32 - #define vld1q_s32(a) simde_vld1q_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(ptr); - #else - simde_int64x2_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s64 - #define vld1q_s64(a) simde_vld1q_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_u8(ptr); - #else - simde_uint8x16_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u8 - #define vld1q_u8(a) simde_vld1q_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_u16(ptr); - #else - simde_uint16x8_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u16 - #define vld1q_u16(a) simde_vld1q_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_u32(ptr); - #else - simde_uint32x4_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u32 - #define vld1q_u32(a) simde_vld1q_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_u64(ptr); - #else - simde_uint64x2_private r_; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_load(ptr); - #else - simde_memcpy(&r_, ptr, sizeof(r_)); - #endif - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u64 - #define vld1q_u64(a) simde_vld1q_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vld1_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_p8(ptr); - #else - simde_poly8x8_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p8 - #define vld1_p8(a) simde_vld1_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vld1_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_p16(ptr); - #else - simde_poly16x4_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p16 - #define vld1_p16(a) simde_vld1_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vld1_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld1_p64(ptr); - #else - simde_poly64x1_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_p64 - #define vld1_p64(a) simde_vld1_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vld1q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld1q_p8(ptr); - #else - simde_poly8x16_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p8 - #define vld1q_p8(a) simde_vld1q_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vld1q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_p16(ptr); - #else - simde_poly16x8_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p16 - #define vld1q_p16(a) simde_vld1q_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vld1q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld1q_p64(ptr); - #else - simde_poly64x2_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_p64 - #define vld1q_p64(a) simde_vld1q_p64((a)) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vldrq_p128(simde_poly128_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vldrq_p128(ptr); - #else - simde_poly128_t r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vldrq_p128 - #define vldrq_p128(a) simde_vldrq_p128((a)) -#endif - -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vld1_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1_bf16(ptr); - #else - simde_bfloat16x4_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_bfloat16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_bf16 - #define vld1_bf16(a) simde_vld1_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vld1q_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1q_bf16(ptr); - #else - simde_bfloat16x8_private r_; - simde_memcpy(&r_, ptr, sizeof(r_)); - return simde_bfloat16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_bf16 - #define vld1q_bf16(a) simde_vld1q_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_H) */ -/* :: End simde/arm/neon/ld1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1_dup.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_DUP_H) -#define SIMDE_ARM_NEON_LD1_DUP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vld1_dup_f16(simde_float16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld1_dup_f16(ptr); - #else - return simde_vdup_n_f16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_f16 - #define vld1_dup_f16(a) simde_vld1_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vld1_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_f32(ptr); - #else - return simde_vdup_n_f32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_f32 - #define vld1_dup_f32(a) simde_vld1_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vld1_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1_dup_f64(ptr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde_vreinterpret_f64_s64(vld1_dup_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, ptr))); - #else - return simde_vdup_n_f64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_f64 - #define vld1_dup_f64(a) simde_vld1_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vld1_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_s8(ptr); - #else - return simde_vdup_n_s8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_s8 - #define vld1_dup_s8(a) simde_vld1_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vld1_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_s16(ptr); - #else - return simde_vdup_n_s16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_s16 - #define vld1_dup_s16(a) simde_vld1_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vld1_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_s32(ptr); - #else - return simde_vdup_n_s32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_s32 - #define vld1_dup_s32(a) simde_vld1_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vld1_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_s64(ptr); - #else - return simde_vdup_n_s64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_s64 - #define vld1_dup_s64(a) simde_vld1_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vld1_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_u8(ptr); - #else - return simde_vdup_n_u8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_u8 - #define vld1_dup_u8(a) simde_vld1_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vld1_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_u16(ptr); - #else - return simde_vdup_n_u16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_u16 - #define vld1_dup_u16(a) simde_vld1_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vld1_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_u32(ptr); - #else - return simde_vdup_n_u32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_u32 - #define vld1_dup_u32(a) simde_vld1_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vld1_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_u64(ptr); - #else - return simde_vdup_n_u64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_u64 - #define vld1_dup_u64(a) simde_vld1_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vld1q_dup_f16(simde_float16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld1q_dup_f16(ptr); - #else - return simde_vdupq_n_f16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_f16 - #define vld1q_dup_f16(a) simde_vld1q_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vld1q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_f32(ptr); - #elif \ - defined(SIMDE_X86_SSE_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_float32x4_private r_; - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_load_ps1(ptr); - #else - r_.v128 = wasm_v128_load32_splat(ptr); - #endif - - return simde_float32x4_from_private(r_); - #else - return simde_vdupq_n_f32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_f32 - #define vld1q_dup_f32(a) simde_vld1q_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vld1q_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_dup_f64(ptr); - #else - return simde_vdupq_n_f64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_f64 - #define vld1q_dup_f64(a) simde_vld1q_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vld1q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_s8(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int8x16_private r_; - - r_.v128 = wasm_v128_load8_splat(ptr); - - return simde_int8x16_from_private(r_); - #else - return simde_vdupq_n_s8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_s8 - #define vld1q_dup_s8(a) simde_vld1q_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vld1q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_s16(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int16x8_private r_; - - r_.v128 = wasm_v128_load16_splat(ptr); - - return simde_int16x8_from_private(r_); - #else - return simde_vdupq_n_s16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_s16 - #define vld1q_dup_s16(a) simde_vld1q_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vld1q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_s32(ptr); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr))); - #else - r_.v128 = wasm_v128_load32_splat(ptr); - #endif - - return simde_int32x4_from_private(r_); - #else - return simde_vdupq_n_s32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_s32 - #define vld1q_dup_s32(a) simde_vld1q_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vld1q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_s64(ptr); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi64x(*ptr); - #else - r_.v128 = wasm_v128_load64_splat(ptr); - #endif - - return simde_int64x2_from_private(r_); - #else - return simde_vdupq_n_s64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_s64 - #define vld1q_dup_s64(a) simde_vld1q_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vld1q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_u8(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint8x16_private r_; - - r_.v128 = wasm_v128_load8_splat(ptr); - - return simde_uint8x16_from_private(r_); - #else - return simde_vdupq_n_u8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_u8 - #define vld1q_dup_u8(a) simde_vld1q_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vld1q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_u16(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint16x8_private r_; - - r_.v128 = wasm_v128_load16_splat(ptr); - - return simde_uint16x8_from_private(r_); - #else - return simde_vdupq_n_u16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_u16 - #define vld1q_dup_u16(a) simde_vld1q_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vld1q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_u32(ptr); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint32x4_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr))); - #else - r_.v128 = wasm_v128_load32_splat(ptr); - #endif - - return simde_uint32x4_from_private(r_); - #else - return simde_vdupq_n_u32(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_u32 - #define vld1q_dup_u32(a) simde_vld1q_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vld1q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_u64(ptr); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint64x2_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_set1_epi64x(*HEDLEY_REINTERPRET_CAST(int64_t const *, ptr)); - #else - r_.v128 = wasm_v128_load64_splat(ptr); - #endif - - return simde_uint64x2_from_private(r_); - #else - return simde_vdupq_n_u64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_u64 - #define vld1q_dup_u64(a) simde_vld1q_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vld1_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_p8(ptr); - #else - return simde_vdup_n_p8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_p8 - #define vld1_dup_p8(a) simde_vld1_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vld1_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1_dup_p16(ptr); - #else - return simde_vdup_n_p16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_p16 - #define vld1_dup_p16(a) simde_vld1_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vld1_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld1_dup_p64(ptr); - #else - return simde_vdup_n_p64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_p64 - #define vld1_dup_p64(a) simde_vld1_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vld1q_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_p8(ptr); - #else - return simde_vdupq_n_p8(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_p8 - #define vld1q_dup_p8(a) simde_vld1q_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vld1q_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_dup_p16(ptr); - #else - return simde_vdupq_n_p16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_p16 - #define vld1q_dup_p16(a) simde_vld1q_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vld1q_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld1q_dup_p64(ptr); - #else - return simde_vdupq_n_p64(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_p64 - #define vld1q_dup_p64(a) simde_vld1q_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vld1_dup_bf16(simde_bfloat16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1_dup_bf16(ptr); - #else - return simde_vdup_n_bf16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_dup_bf16 - #define vld1_dup_bf16(a) simde_vld1_dup_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vld1q_dup_bf16(simde_bfloat16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1q_dup_bf16(ptr); - #else - return simde_vdupq_n_bf16(*ptr); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_dup_bf16 - #define vld1q_dup_bf16(a) simde_vld1q_dup_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_DUP_H) */ -/* :: End simde/arm/neon/ld1_dup.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_LANE_H) -#define SIMDE_ARM_NEON_LD1_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t simde_vld1_lane_s8(int8_t const *ptr, simde_int8x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int8x8_private r = simde_int8x8_to_private(src); - r.values[lane] = *ptr; - return simde_int8x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_s8(ptr, src, lane) vld1_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_s8 - #define vld1_lane_s8(ptr, src, lane) simde_vld1_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t simde_vld1_lane_s16(int16_t const *ptr, simde_int16x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_private r = simde_int16x4_to_private(src); - r.values[lane] = *ptr; - return simde_int16x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_s16(ptr, src, lane) vld1_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_s16 - #define vld1_lane_s16(ptr, src, lane) simde_vld1_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t simde_vld1_lane_s32(int32_t const *ptr, simde_int32x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_private r = simde_int32x2_to_private(src); - r.values[lane] = *ptr; - return simde_int32x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_s32(ptr, src, lane) vld1_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_s32 - #define vld1_lane_s32(ptr, src, lane) simde_vld1_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t simde_vld1_lane_s64(int64_t const *ptr, simde_int64x1_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_int64x1_private r = simde_int64x1_to_private(src); - r.values[lane] = *ptr; - return simde_int64x1_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_s64(ptr, src, lane) vld1_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_s64 - #define vld1_lane_s64(ptr, src, lane) simde_vld1_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t simde_vld1_lane_u8(uint8_t const *ptr, simde_uint8x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint8x8_private r = simde_uint8x8_to_private(src); - r.values[lane] = *ptr; - return simde_uint8x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_u8(ptr, src, lane) vld1_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_u8 - #define vld1_lane_u8(ptr, src, lane) simde_vld1_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t simde_vld1_lane_u16(uint16_t const *ptr, simde_uint16x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4_private r = simde_uint16x4_to_private(src); - r.values[lane] = *ptr; - return simde_uint16x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_u16(ptr, src, lane) vld1_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_u16 - #define vld1_lane_u16(ptr, src, lane) simde_vld1_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t simde_vld1_lane_u32(uint32_t const *ptr, simde_uint32x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2_private r = simde_uint32x2_to_private(src); - r.values[lane] = *ptr; - return simde_uint32x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_u32(ptr, src, lane) vld1_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_u32 - #define vld1_lane_u32(ptr, src, lane) simde_vld1_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t simde_vld1_lane_u64(uint64_t const *ptr, simde_uint64x1_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_uint64x1_private r = simde_uint64x1_to_private(src); - r.values[lane] = *ptr; - return simde_uint64x1_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_u64(ptr, src, lane) vld1_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_u64 - #define vld1_lane_u64(ptr, src, lane) simde_vld1_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vld1_lane_f16(simde_float16_t const *ptr, simde_float16x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4_private r = simde_float16x4_to_private(src); - r.values[lane] = *ptr; - return simde_float16x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld1_lane_f16(ptr, src, lane) vld1_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_f16 - #define vld1_lane_f16(ptr, src, lane) simde_vld1_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vld1_lane_f32(simde_float32_t const *ptr, simde_float32x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r = simde_float32x2_to_private(src); - r.values[lane] = *ptr; - return simde_float32x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_f32(ptr, src, lane) vld1_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_f32 - #define vld1_lane_f32(ptr, src, lane) simde_vld1_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t simde_vld1_lane_f64(simde_float64_t const *ptr, simde_float64x1_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1_private r = simde_float64x1_to_private(src); - r.values[lane] = *ptr; - return simde_float64x1_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld1_lane_f64(ptr, src, lane) vld1_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_f64 - #define vld1_lane_f64(ptr, src, lane) simde_vld1_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t simde_vld1q_lane_s8(int8_t const *ptr, simde_int8x16_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_int8x16_private r = simde_int8x16_to_private(src); - r.values[lane] = *ptr; - return simde_int8x16_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_s8(ptr, src, lane) vld1q_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_s8 - #define vld1q_lane_s8(ptr, src, lane) simde_vld1q_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t simde_vld1q_lane_s16(int16_t const *ptr, simde_int16x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_private r = simde_int16x8_to_private(src); - r.values[lane] = *ptr; - return simde_int16x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_s16(ptr, src, lane) vld1q_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_s16 - #define vld1q_lane_s16(ptr, src, lane) simde_vld1q_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t simde_vld1q_lane_s32(int32_t const *ptr, simde_int32x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private r = simde_int32x4_to_private(src); - r.values[lane] = *ptr; - return simde_int32x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_s32(ptr, src, lane) vld1q_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_s32 - #define vld1q_lane_s32(ptr, src, lane) simde_vld1q_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t simde_vld1q_lane_s64(int64_t const *ptr, simde_int64x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_private r = simde_int64x2_to_private(src); - r.values[lane] = *ptr; - return simde_int64x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_s64(ptr, src, lane) vld1q_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_s64 - #define vld1q_lane_s64(ptr, src, lane) simde_vld1q_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t simde_vld1q_lane_u8(uint8_t const *ptr, simde_uint8x16_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_uint8x16_private r = simde_uint8x16_to_private(src); - r.values[lane] = *ptr; - return simde_uint8x16_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_u8(ptr, src, lane) vld1q_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_u8 - #define vld1q_lane_u8(ptr, src, lane) simde_vld1q_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t simde_vld1q_lane_u16(uint16_t const *ptr, simde_uint16x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8_private r = simde_uint16x8_to_private(src); - r.values[lane] = *ptr; - return simde_uint16x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_u16(ptr, src, lane) vld1q_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_u16 - #define vld1q_lane_u16(ptr, src, lane) simde_vld1q_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t simde_vld1q_lane_u32(uint32_t const *ptr, simde_uint32x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4_private r = simde_uint32x4_to_private(src); - r.values[lane] = *ptr; - return simde_uint32x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_u32(ptr, src, lane) vld1q_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_u32 - #define vld1q_lane_u32(ptr, src, lane) simde_vld1q_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t simde_vld1q_lane_u64(uint64_t const *ptr, simde_uint64x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint64x2_private r = simde_uint64x2_to_private(src); - r.values[lane] = *ptr; - return simde_uint64x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_u64(ptr, src, lane) vld1q_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_u64 - #define vld1q_lane_u64(ptr, src, lane) simde_vld1q_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vld1q_lane_f16(simde_float16_t const *ptr, simde_float16x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8_private r = simde_float16x8_to_private(src); - r.values[lane] = *ptr; - return simde_float16x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld1q_lane_f16(ptr, src, lane) vld1q_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_f16 - #define vld1q_lane_f16(ptr, src, lane) simde_vld1q_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vld1q_lane_f32(simde_float32_t const *ptr, simde_float32x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private r = simde_float32x4_to_private(src); - r.values[lane] = *ptr; - return simde_float32x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_f32(ptr, src, lane) vld1q_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_f32 - #define vld1q_lane_f32(ptr, src, lane) simde_vld1q_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t simde_vld1q_lane_f64(simde_float64_t const *ptr, simde_float64x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2_private r = simde_float64x2_to_private(src); - r.values[lane] = *ptr; - return simde_float64x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld1q_lane_f64(ptr, src, lane) vld1q_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_f64 - #define vld1q_lane_f64(ptr, src, lane) simde_vld1q_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vld1_lane_p8(simde_poly8_t const *ptr, simde_poly8x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8x8_private r = simde_poly8x8_to_private(src); - r.values[lane] = *ptr; - return simde_poly8x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_p8(ptr, src, lane) vld1_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_p8 - #define vld1_lane_p8(ptr, src, lane) simde_vld1_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vld1_lane_p16(simde_poly16_t const *ptr, simde_poly16x4_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16x4_private r = simde_poly16x4_to_private(src); - r.values[lane] = *ptr; - return simde_poly16x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1_lane_p16(ptr, src, lane) vld1_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_p16 - #define vld1_lane_p16(ptr, src, lane) simde_vld1_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vld1_lane_p64(simde_poly64_t const *ptr, simde_poly64x1_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64x1_private r = simde_poly64x1_to_private(src); - r.values[lane] = *ptr; - return simde_poly64x1_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vld1_lane_p64(ptr, src, lane) vld1_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_p64 - #define vld1_lane_p64(ptr, src, lane) simde_vld1_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vld1q_lane_p8(simde_poly8_t const *ptr, simde_poly8x16_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8x16_private r = simde_poly8x16_to_private(src); - r.values[lane] = *ptr; - return simde_poly8x16_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_p8(ptr, src, lane) vld1q_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_p8 - #define vld1q_lane_p8(ptr, src, lane) simde_vld1q_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vld1q_lane_p16(simde_poly16_t const *ptr, simde_poly16x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16x8_private r = simde_poly16x8_to_private(src); - r.values[lane] = *ptr; - return simde_poly16x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld1q_lane_p16(ptr, src, lane) vld1q_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_p16 - #define vld1q_lane_p16(ptr, src, lane) simde_vld1q_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vld1q_lane_p64(simde_poly64_t const *ptr, simde_poly64x2_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64x2_private r = simde_poly64x2_to_private(src); - r.values[lane] = *ptr; - return simde_poly64x2_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vld1q_lane_p64(ptr, src, lane) vld1q_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_p64 - #define vld1q_lane_p64(ptr, src, lane) simde_vld1q_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t simde_vld1_lane_bf16(simde_bfloat16_t const *ptr, simde_bfloat16x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16x4_private r = simde_bfloat16x4_to_private(src); - r.values[lane] = *ptr; - return simde_bfloat16x4_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld1_lane_bf16(ptr, src, lane) vld1_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_lane_bf16 - #define vld1_lane_bf16(ptr, src, lane) simde_vld1_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t simde_vld1q_lane_bf16(simde_bfloat16_t const *ptr, simde_bfloat16x8_t src, - const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16x8_private r = simde_bfloat16x8_to_private(src); - r.values[lane] = *ptr; - return simde_bfloat16x8_from_private(r); -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld1q_lane_bf16(ptr, src, lane) vld1q_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_lane_bf16 - #define vld1q_lane_bf16(ptr, src, lane) simde_vld1q_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_LANE_H) */ -/* :: End simde/arm/neon/ld1_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1_x2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_X2_H) -#define SIMDE_ARM_NEON_LD1_X2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vld1_f16_x2(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f16_x2(ptr); - #else - simde_float16x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float16x4x2_t s_ = { { simde_float16x4_from_private(a_[0]), - simde_float16x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f16_x2 - #define vld1_f16_x2(a) simde_vld1_f16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vld1_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f32_x2(ptr); - #else - simde_float32x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float32x2x2_t s_ = { { simde_float32x2_from_private(a_[0]), - simde_float32x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f32_x2 - #define vld1_f32_x2(a) simde_vld1_f32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x2_t -simde_vld1_f64_x2(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1_f64_x2(ptr); - #else - simde_float64x1_private a_[2]; - for (size_t i = 0; i < 2; i++) { - a_[i].values[0] = ptr[i]; - } - simde_float64x1x2_t s_ = { { simde_float64x1_from_private(a_[0]), - simde_float64x1_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_f64_x2 - #define vld1_f64_x2(a) simde_vld1_f64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vld1_s8_x2(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s8_x2(ptr); - #else - simde_int8x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int8x8x2_t s_ = { { simde_int8x8_from_private(a_[0]), - simde_int8x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s8_x2 - #define vld1_s8_x2(a) simde_vld1_s8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vld1_s16_x2(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s16_x2(ptr); - #else - simde_int16x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int16x4x2_t s_ = { { simde_int16x4_from_private(a_[0]), - simde_int16x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s16_x2 - #define vld1_s16_x2(a) simde_vld1_s16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vld1_s32_x2(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s32_x2(ptr); - #else - simde_int32x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int32x2x2_t s_ = { { simde_int32x2_from_private(a_[0]), - simde_int32x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s32_x2 - #define vld1_s32_x2(a) simde_vld1_s32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x2_t -simde_vld1_s64_x2(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s64_x2(ptr); - #else - simde_int64x1_private a_[2]; - for (size_t i = 0; i < 2; i++) { - a_[i].values[0] = ptr[i]; - } - simde_int64x1x2_t s_ = { { simde_int64x1_from_private(a_[0]), - simde_int64x1_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s64_x2 - #define vld1_s64_x2(a) simde_vld1_s64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vld1_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u8_x2(ptr); - #else - simde_uint8x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint8x8x2_t s_ = { { simde_uint8x8_from_private(a_[0]), - simde_uint8x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u8_x2 - #define vld1_u8_x2(a) simde_vld1_u8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vld1_u16_x2(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u16_x2(ptr); - #else - simde_uint16x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint16x4x2_t s_ = { { simde_uint16x4_from_private(a_[0]), - simde_uint16x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u16_x2 - #define vld1_u16_x2(a) simde_vld1_u16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vld1_u32_x2(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u32_x2(ptr); - #else - simde_uint32x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint32x2x2_t s_ = { { simde_uint32x2_from_private(a_[0]), - simde_uint32x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u32_x2 - #define vld1_u32_x2(a) simde_vld1_u32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x2_t -simde_vld1_u64_x2(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u64_x2(ptr); - #else - simde_uint64x1_private a_[2]; - for (size_t i = 0; i < 2; i++) { - a_[i].values[0] = ptr[i]; - } - simde_uint64x1x2_t s_ = { { simde_uint64x1_from_private(a_[0]), - simde_uint64x1_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u64_x2 - #define vld1_u64_x2(a) simde_vld1_u64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vld1_p8_x2(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld1_p8_x2(ptr); - #else - simde_poly8x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly8x8x2_t s_ = { { simde_poly8x8_from_private(a_[0]), - simde_poly8x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p8_x2 - #define vld1_p8_x2(a) simde_vld1_p8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vld1_p16_x2(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld1_p16_x2(ptr); - #else - simde_poly16x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_poly16x4x2_t s_ = { { simde_poly16x4_from_private(a_[0]), - simde_poly16x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p16_x2 - #define vld1_p16_x2(a) simde_vld1_p16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x2_t -simde_vld1_p64_x2(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p64_x2(ptr); - #else - simde_poly64x1_private a_[2]; - for (size_t i = 0; i < 2; i++) { - a_[i].values[0] = ptr[i]; - } - simde_poly64x1x2_t s_ = { { simde_poly64x1_from_private(a_[0]), - simde_poly64x1_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_p64_x2 - #define vld1_p64_x2(a) simde_vld1_p64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x2_t -simde_vld1_bf16_x2(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1_bf16_x2(ptr); - #else - simde_bfloat16x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_bfloat16x4x2_t s_ = { { simde_bfloat16x4_from_private(a_[0]), - simde_bfloat16x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_bf16_x2 - #define vld1_bf16_x2(a) simde_vld1_bf16_x2((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_X2_H) */ -/* :: End simde/arm/neon/ld1_x2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1_x3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_X3_H) -#define SIMDE_ARM_NEON_LD1_X3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x3_t -simde_vld1_f16_x3(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f16_x3(ptr); - #else - simde_float16x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float16x4x3_t s_ = { { simde_float16x4_from_private(a_[0]), - simde_float16x4_from_private(a_[1]), - simde_float16x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f16_x3 - #define vld1_f16_x3(a) simde_vld1_f16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x3_t -simde_vld1_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f32_x3(ptr); - #else - simde_float32x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float32x2x3_t s_ = { { simde_float32x2_from_private(a_[0]), - simde_float32x2_from_private(a_[1]), - simde_float32x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f32_x3 - #define vld1_f32_x3(a) simde_vld1_f32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x3_t -simde_vld1_f64_x3(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1_f64_x3(ptr); - #else - simde_float64x1_private a_[3]; - for (size_t i = 0; i < 3; i++) { - a_[i].values[0] = ptr[i]; - } - simde_float64x1x3_t s_ = { { simde_float64x1_from_private(a_[0]), - simde_float64x1_from_private(a_[1]), - simde_float64x1_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_f64_x3 - #define vld1_f64_x3(a) simde_vld1_f64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x3_t -simde_vld1_s8_x3(int8_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s8_x3(ptr); - #else - simde_int8x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int8x8x3_t s_ = { { simde_int8x8_from_private(a_[0]), - simde_int8x8_from_private(a_[1]), - simde_int8x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s8_x3 - #define vld1_s8_x3(a) simde_vld1_s8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x3_t -simde_vld1_s16_x3(int16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s16_x3(ptr); - #else - simde_int16x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int16x4x3_t s_ = { { simde_int16x4_from_private(a_[0]), - simde_int16x4_from_private(a_[1]), - simde_int16x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s16_x3 - #define vld1_s16_x3(a) simde_vld1_s16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x3_t -simde_vld1_s32_x3(int32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s32_x3(ptr); - #else - simde_int32x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int32x2x3_t s_ = { { simde_int32x2_from_private(a_[0]), - simde_int32x2_from_private(a_[1]), - simde_int32x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s32_x3 - #define vld1_s32_x3(a) simde_vld1_s32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x3_t -simde_vld1_s64_x3(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s64_x3(ptr); - #else - simde_int64x1_private a_[3]; - for (size_t i = 0; i < 3; i++) { - a_[i].values[0] = ptr[i]; - } - simde_int64x1x3_t s_ = { { simde_int64x1_from_private(a_[0]), - simde_int64x1_from_private(a_[1]), - simde_int64x1_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s64_x3 - #define vld1_s64_x3(a) simde_vld1_s64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x3_t -simde_vld1_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u8_x3(ptr); - #else - simde_uint8x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint8x8x3_t s_ = { { simde_uint8x8_from_private(a_[0]), - simde_uint8x8_from_private(a_[1]), - simde_uint8x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u8_x3 - #define vld1_u8_x3(a) simde_vld1_u8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x3_t -simde_vld1_u16_x3(uint16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u16_x3(ptr); - #else - simde_uint16x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint16x4x3_t s_ = { { simde_uint16x4_from_private(a_[0]), - simde_uint16x4_from_private(a_[1]), - simde_uint16x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u16_x3 - #define vld1_u16_x3(a) simde_vld1_u16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x3_t -simde_vld1_u32_x3(uint32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u32_x3(ptr); - #else - simde_uint32x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint32x2x3_t s_ = { { simde_uint32x2_from_private(a_[0]), - simde_uint32x2_from_private(a_[1]), - simde_uint32x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u32_x3 - #define vld1_u32_x3(a) simde_vld1_u32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x3_t -simde_vld1_u64_x3(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u64_x3(ptr); - #else - simde_uint64x1_private a_[3]; - for (size_t i = 0; i < 3; i++) { - a_[i].values[0] = ptr[i]; - } - simde_uint64x1x3_t s_ = { { simde_uint64x1_from_private(a_[0]), - simde_uint64x1_from_private(a_[1]), - simde_uint64x1_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u64_x3 - #define vld1_u64_x3(a) simde_vld1_u64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x3_t -simde_vld1_p8_x3(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p8_x3(ptr); - #else - simde_poly8x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly8x8x3_t s_ = { { simde_poly8x8_from_private(a_[0]), - simde_poly8x8_from_private(a_[1]), - simde_poly8x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p8_x3 - #define vld1_p8_x3(a) simde_vld1_p8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x3_t -simde_vld1_p16_x3(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p16_x3(ptr); - #else - simde_poly16x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_poly16x4x3_t s_ = { { simde_poly16x4_from_private(a_[0]), - simde_poly16x4_from_private(a_[1]), - simde_poly16x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p16_x3 - #define vld1_p16_x3(a) simde_vld1_p16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x3_t -simde_vld1_p64_x3(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p64_x3(ptr); - #else - simde_poly64x1_private a_[3]; - for (size_t i = 0; i < 3; i++) { - a_[i].values[0] = ptr[i]; - } - simde_poly64x1x3_t s_ = { { simde_poly64x1_from_private(a_[0]), - simde_poly64x1_from_private(a_[1]), - simde_poly64x1_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_p64_x3 - #define vld1_p64_x3(a) simde_vld1_p64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x3_t -simde_vld1_bf16_x3(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1_bf16_x3(ptr); - #else - simde_bfloat16x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_bfloat16x4x3_t s_ = { { simde_bfloat16x4_from_private(a_[0]), - simde_bfloat16x4_from_private(a_[1]), - simde_bfloat16x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_bf16_x3 - #define vld1_bf16_x3(a) simde_vld1_bf16_x3((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_X3_H) */ -/* :: End simde/arm/neon/ld1_x3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1_x4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1_X4_H) -#define SIMDE_ARM_NEON_LD1_X4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x4_t -simde_vld1_f16_x4(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f16_x4(ptr); - #else - simde_float16x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float16x4x4_t s_ = { { simde_float16x4_from_private(a_[0]), - simde_float16x4_from_private(a_[1]), - simde_float16x4_from_private(a_[2]), - simde_float16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f16_x4 - #define vld1_f16_x4(a) simde_vld1_f16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x4_t -simde_vld1_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_f32_x4(ptr); - #else - simde_float32x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), - simde_float32x2_from_private(a_[1]), - simde_float32x2_from_private(a_[2]), - simde_float32x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_f32_x4 - #define vld1_f32_x4(a) simde_vld1_f32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x4_t -simde_vld1_f64_x4(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1_f64_x4(ptr); - #else - simde_float64x1_private a_[4]; - for (size_t i = 0; i < 4; i++) { - a_[i].values[0] = ptr[i]; - } - simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), - simde_float64x1_from_private(a_[1]), - simde_float64x1_from_private(a_[2]), - simde_float64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1_f64_x4 - #define vld1_f64_x4(a) simde_vld1_f64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x4_t -simde_vld1_s8_x4(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s8_x4(ptr); - #else - simde_int8x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), - simde_int8x8_from_private(a_[1]), - simde_int8x8_from_private(a_[2]), - simde_int8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s8_x4 - #define vld1_s8_x4(a) simde_vld1_s8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x4_t -simde_vld1_s16_x4(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s16_x4(ptr); - #else - simde_int16x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), - simde_int16x4_from_private(a_[1]), - simde_int16x4_from_private(a_[2]), - simde_int16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s16_x4 - #define vld1_s16_x4(a) simde_vld1_s16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x4_t -simde_vld1_s32_x4(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s32_x4(ptr); - #else - simde_int32x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), - simde_int32x2_from_private(a_[1]), - simde_int32x2_from_private(a_[2]), - simde_int32x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s32_x4 - #define vld1_s32_x4(a) simde_vld1_s32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x4_t -simde_vld1_s64_x4(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_s64_x4(ptr); - #else - simde_int64x1_private a_[4]; - for (size_t i = 0; i < 4; i++) { - a_[i].values[0] = ptr[i]; - } - simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), - simde_int64x1_from_private(a_[1]), - simde_int64x1_from_private(a_[2]), - simde_int64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_s64_x4 - #define vld1_s64_x4(a) simde_vld1_s64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x4_t -simde_vld1_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u8_x4(ptr); - #else - simde_uint8x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), - simde_uint8x8_from_private(a_[1]), - simde_uint8x8_from_private(a_[2]), - simde_uint8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u8_x4 - #define vld1_u8_x4(a) simde_vld1_u8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x4_t -simde_vld1_u16_x4(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u16_x4(ptr); - #else - simde_uint16x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), - simde_uint16x4_from_private(a_[1]), - simde_uint16x4_from_private(a_[2]), - simde_uint16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u16_x4 - #define vld1_u16_x4(a) simde_vld1_u16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x4_t -simde_vld1_u32_x4(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u32_x4(ptr); - #else - simde_uint32x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), - simde_uint32x2_from_private(a_[1]), - simde_uint32x2_from_private(a_[2]), - simde_uint32x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u32_x4 - #define vld1_u32_x4(a) simde_vld1_u32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x4_t -simde_vld1_u64_x4(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_u64_x4(ptr); - #else - simde_uint64x1_private a_[4]; - for (size_t i = 0; i < 4; i++) { - a_[i].values[0] = ptr[i]; - } - simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), - simde_uint64x1_from_private(a_[1]), - simde_uint64x1_from_private(a_[2]), - simde_uint64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_u64_x4 - #define vld1_u64_x4(a) simde_vld1_u64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x4_t -simde_vld1_p8_x4(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p8_x4(ptr); - #else - simde_poly8x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly8x8x4_t s_ = { { simde_poly8x8_from_private(a_[0]), - simde_poly8x8_from_private(a_[1]), - simde_poly8x8_from_private(a_[2]), - simde_poly8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p8_x4 - #define vld1_p8_x4(a) simde_vld1_p8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x4_t -simde_vld1_p16_x4(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p16_x4(ptr); - #else - simde_poly16x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_poly16x4x4_t s_ = { { simde_poly16x4_from_private(a_[0]), - simde_poly16x4_from_private(a_[1]), - simde_poly16x4_from_private(a_[2]), - simde_poly16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1_p16_x4 - #define vld1_p16_x4(a) simde_vld1_p16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x4_t -simde_vld1_p64_x4(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1_p64_x4(ptr); - #else - simde_poly64x1_private a_[4]; - for (size_t i = 0; i < 4; i++) { - a_[i].values[0] = ptr[i]; - } - simde_poly64x1x4_t s_ = { { simde_poly64x1_from_private(a_[0]), - simde_poly64x1_from_private(a_[1]), - simde_poly64x1_from_private(a_[2]), - simde_poly64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_p64_x4 - #define vld1_p64_x4(a) simde_vld1_p64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x4_t -simde_vld1_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1_bf16_x4(ptr); - #else - simde_bfloat16x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_bfloat16x4x4_t s_ = { { simde_bfloat16x4_from_private(a_[0]), - simde_bfloat16x4_from_private(a_[1]), - simde_bfloat16x4_from_private(a_[2]), - simde_bfloat16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1_bf16_x4 - #define vld1_bf16_x4(a) simde_vld1_bf16_x4((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1_X4_H) */ -/* :: End simde/arm/neon/ld1_x4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1q_x2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1Q_X2_H) -#define SIMDE_ARM_NEON_LD1Q_X2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vld1q_f16_x2(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - defined(SIMDE_ARM_NEON_FP16) - return vld1q_f16_x2(ptr); - #else - simde_float16x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_float16x8x2_t s_ = { { simde_float16x8_from_private(a_[0]), - simde_float16x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f16_x2 - #define vld1q_f16_x2(a) simde_vld1q_f16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vld1q_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_f32_x2(ptr); - #else - simde_float32x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float32x4x2_t s_ = { { simde_float32x4_from_private(a_[0]), - simde_float32x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f32_x2 - #define vld1q_f32_x2(a) simde_vld1q_f32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x2_t -simde_vld1q_f64_x2(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1q_f64_x2(ptr); - #else - simde_float64x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float64x2x2_t s_ = { { simde_float64x2_from_private(a_[0]), - simde_float64x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_f64_x2 - #define vld1q_f64_x2(a) simde_vld1q_f64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vld1q_s8_x2(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s8_x2(ptr); - #else - simde_int8x16_private a_[2]; - for (size_t i = 0; i < 32; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_int8x16x2_t s_ = { { simde_int8x16_from_private(a_[0]), - simde_int8x16_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s8_x2 - #define vld1q_s8_x2(a) simde_vld1q_s8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vld1q_s16_x2(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s16_x2(ptr); - #else - simde_int16x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int16x8x2_t s_ = { { simde_int16x8_from_private(a_[0]), - simde_int16x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s16_x2 - #define vld1q_s16_x2(a) simde_vld1q_s16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vld1q_s32_x2(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s32_x2(ptr); - #else - simde_int32x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int32x4x2_t s_ = { { simde_int32x4_from_private(a_[0]), - simde_int32x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s32_x2 - #define vld1q_s32_x2(a) simde_vld1q_s32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x2_t -simde_vld1q_s64_x2(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s64_x2(ptr); - #else - simde_int64x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int64x2x2_t s_ = { { simde_int64x2_from_private(a_[0]), - simde_int64x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s64_x2 - #define vld1q_s64_x2(a) simde_vld1q_s64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vld1q_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u8_x2(ptr); - #else - simde_uint8x16_private a_[2]; - for (size_t i = 0; i < 32; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_uint8x16x2_t s_ = { { simde_uint8x16_from_private(a_[0]), - simde_uint8x16_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u8_x2 - #define vld1q_u8_x2(a) simde_vld1q_u8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vld1q_u16_x2(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u16_x2(ptr); - #else - simde_uint16x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint16x8x2_t s_ = { { simde_uint16x8_from_private(a_[0]), - simde_uint16x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u16_x2 - #define vld1q_u16_x2(a) simde_vld1q_u16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vld1q_u32_x2(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u32_x2(ptr); - #else - simde_uint32x4_private a_[2]; - for (size_t i = 0; i < 8; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint32x4x2_t s_ = { { simde_uint32x4_from_private(a_[0]), - simde_uint32x4_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u32_x2 - #define vld1q_u32_x2(a) simde_vld1q_u32_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x2_t -simde_vld1q_u64_x2(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u64_x2(ptr); - #else - simde_uint64x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint64x2x2_t s_ = { { simde_uint64x2_from_private(a_[0]), - simde_uint64x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u64_x2 - #define vld1q_u64_x2(a) simde_vld1q_u64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vld1q_p8_x2(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p8_x2(ptr); - #else - simde_poly8x16_private a_[2]; - for (size_t i = 0; i < 32; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_poly8x16x2_t s_ = { { simde_poly8x16_from_private(a_[0]), - simde_poly8x16_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p8_x2 - #define vld1q_p8_x2(a) simde_vld1q_p8_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vld1q_p16_x2(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p16_x2(ptr); - #else - simde_poly16x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly16x8x2_t s_ = { { simde_poly16x8_from_private(a_[0]), - simde_poly16x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p16_x2 - #define vld1q_p16_x2(a) simde_vld1q_p16_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x2_t -simde_vld1q_p64_x2(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p64_x2(ptr); - #else - simde_poly64x2_private a_[2]; - for (size_t i = 0; i < 4; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_poly64x2x2_t s_ = { { simde_poly64x2_from_private(a_[0]), - simde_poly64x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_p64_x2 - #define vld1q_p64_x2(a) simde_vld1q_p64_x2((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x2_t -simde_vld1q_bf16_x2(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1q_bf16_x2(ptr); - #else - simde_bfloat16x8_private a_[2]; - for (size_t i = 0; i < 16; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_bfloat16x8x2_t s_ = { { simde_bfloat16x8_from_private(a_[0]), - simde_bfloat16x8_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_bf16_x2 - #define vld1q_bf16_x2(a) simde_vld1q_bf16_x2((a)) -#endif - - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X2_H) */ -/* :: End simde/arm/neon/ld1q_x2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1q_x3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1Q_X3_H) -#define SIMDE_ARM_NEON_LD1Q_X3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x3_t -simde_vld1q_f16_x3(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_f16_x3(ptr); - #else - simde_float16x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_float16x8x3_t s_ = { { simde_float16x8_from_private(a_[0]), - simde_float16x8_from_private(a_[1]), - simde_float16x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f16_x3 - #define vld1q_f16_x3(a) simde_vld1q_f16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x3_t -simde_vld1q_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_f32_x3(ptr); - #else - simde_float32x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float32x4x3_t s_ = { { simde_float32x4_from_private(a_[0]), - simde_float32x4_from_private(a_[1]), - simde_float32x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f32_x3 - #define vld1q_f32_x3(a) simde_vld1q_f32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x3_t -simde_vld1q_f64_x3(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1q_f64_x3(ptr); - #else - simde_float64x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float64x2x3_t s_ = { { simde_float64x2_from_private(a_[0]), - simde_float64x2_from_private(a_[1]), - simde_float64x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_f64_x3 - #define vld1q_f64_x3(a) simde_vld1q_f64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x3_t -simde_vld1q_s8_x3(int8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s8_x3(ptr); - #else - simde_int8x16_private a_[3]; - for (size_t i = 0; i < 48; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_int8x16x3_t s_ = { { simde_int8x16_from_private(a_[0]), - simde_int8x16_from_private(a_[1]), - simde_int8x16_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s8_x3 - #define vld1q_s8_x3(a) simde_vld1q_s8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x3_t -simde_vld1q_s16_x3(int16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s16_x3(ptr); - #else - simde_int16x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int16x8x3_t s_ = { { simde_int16x8_from_private(a_[0]), - simde_int16x8_from_private(a_[1]), - simde_int16x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s16_x3 - #define vld1q_s16_x3(a) simde_vld1q_s16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x3_t -simde_vld1q_s32_x3(int32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s32_x3(ptr); - #else - simde_int32x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int32x4x3_t s_ = { { simde_int32x4_from_private(a_[0]), - simde_int32x4_from_private(a_[1]), - simde_int32x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s32_x3 - #define vld1q_s32_x3(a) simde_vld1q_s32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x3_t -simde_vld1q_s64_x3(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s64_x3(ptr); - #else - simde_int64x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int64x2x3_t s_ = { { simde_int64x2_from_private(a_[0]), - simde_int64x2_from_private(a_[1]), - simde_int64x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s64_x3 - #define vld1q_s64_x3(a) simde_vld1q_s64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x3_t -simde_vld1q_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u8_x3(ptr); - #else - simde_uint8x16_private a_[3]; - for (size_t i = 0; i < 48; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_uint8x16x3_t s_ = { { simde_uint8x16_from_private(a_[0]), - simde_uint8x16_from_private(a_[1]), - simde_uint8x16_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u8_x3 - #define vld1q_u8_x3(a) simde_vld1q_u8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x3_t -simde_vld1q_u16_x3(uint16_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u16_x3(ptr); - #else - simde_uint16x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint16x8x3_t s_ = { { simde_uint16x8_from_private(a_[0]), - simde_uint16x8_from_private(a_[1]), - simde_uint16x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u16_x3 - #define vld1q_u16_x3(a) simde_vld1q_u16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x3_t -simde_vld1q_u32_x3(uint32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u32_x3(ptr); - #else - simde_uint32x4_private a_[3]; - for (size_t i = 0; i < 12; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint32x4x3_t s_ = { { simde_uint32x4_from_private(a_[0]), - simde_uint32x4_from_private(a_[1]), - simde_uint32x4_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u32_x3 - #define vld1q_u32_x3(a) simde_vld1q_u32_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x3_t -simde_vld1q_u64_x3(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u64_x3(ptr); - #else - simde_uint64x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint64x2x3_t s_ = { { simde_uint64x2_from_private(a_[0]), - simde_uint64x2_from_private(a_[1]), - simde_uint64x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u64_x3 - #define vld1q_u64_x3(a) simde_vld1q_u64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x3_t -simde_vld1q_p8_x3(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p8_x3(ptr); - #else - simde_poly8x16_private a_[3]; - for (size_t i = 0; i < 48; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_poly8x16x3_t s_ = { { simde_poly8x16_from_private(a_[0]), - simde_poly8x16_from_private(a_[1]), - simde_poly8x16_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p8_x3 - #define vld1q_p8_x3(a) simde_vld1q_p8_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x3_t -simde_vld1q_p16_x3(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p16_x3(ptr); - #else - simde_poly16x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly16x8x3_t s_ = { { simde_poly16x8_from_private(a_[0]), - simde_poly16x8_from_private(a_[1]), - simde_poly16x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p16_x3 - #define vld1q_p16_x3(a) simde_vld1q_p16_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x3_t -simde_vld1q_p64_x3(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p64_x3(ptr); - #else - simde_poly64x2_private a_[3]; - for (size_t i = 0; i < 6; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_poly64x2x3_t s_ = { { simde_poly64x2_from_private(a_[0]), - simde_poly64x2_from_private(a_[1]), - simde_poly64x2_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_p64_x3 - #define vld1q_p64_x3(a) simde_vld1q_p64_x3((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x3_t -simde_vld1q_bf16_x3(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(24)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1q_bf16_x3(ptr); - #else - simde_bfloat16x8_private a_[3]; - for (size_t i = 0; i < 24; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_bfloat16x8x3_t s_ = { { simde_bfloat16x8_from_private(a_[0]), - simde_bfloat16x8_from_private(a_[1]), - simde_bfloat16x8_from_private(a_[2]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_bf16_x3 - #define vld1q_bf16_x3(a) simde_vld1q_bf16_x3((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X3_H) */ -/* :: End simde/arm/neon/ld1q_x3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld1q_x4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD1Q_X4_H) -#define SIMDE_ARM_NEON_LD1Q_X4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x4_t -simde_vld1q_f16_x4(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_f16_x4(ptr); - #else - simde_float16x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_float16x8x4_t s_ = { { simde_float16x8_from_private(a_[0]), - simde_float16x8_from_private(a_[1]), - simde_float16x8_from_private(a_[2]), - simde_float16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f16_x4 - #define vld1q_f16_x4(a) simde_vld1q_f16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x4_t -simde_vld1q_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_f32_x4(ptr); - #else - simde_float32x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), - simde_float32x4_from_private(a_[1]), - simde_float32x4_from_private(a_[2]), - simde_float32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_f32_x4 - #define vld1q_f32_x4(a) simde_vld1q_f32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x4_t -simde_vld1q_f64_x4(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - return vld1q_f64_x4(ptr); - #else - simde_float64x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), - simde_float64x2_from_private(a_[1]), - simde_float64x2_from_private(a_[2]), - simde_float64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_f64_x4 - #define vld1q_f64_x4(a) simde_vld1q_f64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x4_t -simde_vld1q_s8_x4(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s8_x4(ptr); - #else - simde_int8x16_private a_[4]; - for (size_t i = 0; i < 64; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), - simde_int8x16_from_private(a_[1]), - simde_int8x16_from_private(a_[2]), - simde_int8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s8_x4 - #define vld1q_s8_x4(a) simde_vld1q_s8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x4_t -simde_vld1q_s16_x4(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s16_x4(ptr); - #else - simde_int16x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), - simde_int16x8_from_private(a_[1]), - simde_int16x8_from_private(a_[2]), - simde_int16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s16_x4 - #define vld1q_s16_x4(a) simde_vld1q_s16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x4_t -simde_vld1q_s32_x4(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s32_x4(ptr); - #else - simde_int32x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), - simde_int32x4_from_private(a_[1]), - simde_int32x4_from_private(a_[2]), - simde_int32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s32_x4 - #define vld1q_s32_x4(a) simde_vld1q_s32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x4_t -simde_vld1q_s64_x4(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_s64_x4(ptr); - #else - simde_int64x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), - simde_int64x2_from_private(a_[1]), - simde_int64x2_from_private(a_[1]), - simde_int64x2_from_private(a_[1]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_s64_x4 - #define vld1q_s64_x4(a) simde_vld1q_s64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x4_t -simde_vld1q_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u8_x4(ptr); - #else - simde_uint8x16_private a_[4]; - for (size_t i = 0; i < 64; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), - simde_uint8x16_from_private(a_[1]), - simde_uint8x16_from_private(a_[2]), - simde_uint8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u8_x4 - #define vld1q_u8_x4(a) simde_vld1q_u8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x4_t -simde_vld1q_u16_x4(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u16_x4(ptr); - #else - simde_uint16x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), - simde_uint16x8_from_private(a_[1]), - simde_uint16x8_from_private(a_[2]), - simde_uint16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u16_x4 - #define vld1q_u16_x4(a) simde_vld1q_u16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x4_t -simde_vld1q_u32_x4(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u32_x4(ptr); - #else - simde_uint32x4_private a_[4]; - for (size_t i = 0; i < 16; i++) { - a_[i / 4].values[i % 4] = ptr[i]; - } - simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), - simde_uint32x4_from_private(a_[1]), - simde_uint32x4_from_private(a_[2]), - simde_uint32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u32_x4 - #define vld1q_u32_x4(a) simde_vld1q_u32_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x4_t -simde_vld1q_u64_x4(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_u64_x4(ptr); - #else - simde_uint64x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), - simde_uint64x2_from_private(a_[1]), - simde_uint64x2_from_private(a_[2]), - simde_uint64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_u64_x4 - #define vld1q_u64_x4(a) simde_vld1q_u64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x4_t -simde_vld1q_p8_x4(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p8_x4(ptr); - #else - simde_poly8x16_private a_[4]; - for (size_t i = 0; i < 64; i++) { - a_[i / 16].values[i % 16] = ptr[i]; - } - simde_poly8x16x4_t s_ = { { simde_poly8x16_from_private(a_[0]), - simde_poly8x16_from_private(a_[1]), - simde_poly8x16_from_private(a_[2]), - simde_poly8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p8_x4 - #define vld1q_p8_x4(a) simde_vld1q_p8_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x4_t -simde_vld1q_p16_x4(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p16_x4(ptr); - #else - simde_poly16x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_poly16x8x4_t s_ = { { simde_poly16x8_from_private(a_[0]), - simde_poly16x8_from_private(a_[1]), - simde_poly16x8_from_private(a_[2]), - simde_poly16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld1q_p16_x4 - #define vld1q_p16_x4(a) simde_vld1q_p16_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x4_t -simde_vld1q_p64_x4(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - return vld1q_p64_x4(ptr); - #else - simde_poly64x2_private a_[4]; - for (size_t i = 0; i < 8; i++) { - a_[i / 2].values[i % 2] = ptr[i]; - } - simde_poly64x2x4_t s_ = { { simde_poly64x2_from_private(a_[0]), - simde_poly64x2_from_private(a_[1]), - simde_poly64x2_from_private(a_[2]), - simde_poly64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_p64_x4 - #define vld1q_p64_x4(a) simde_vld1q_p64_x4((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x4_t -simde_vld1q_bf16_x4(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld1q_bf16_x4(ptr); - #else - simde_bfloat16x8_private a_[4]; - for (size_t i = 0; i < 32; i++) { - a_[i / 8].values[i % 8] = ptr[i]; - } - simde_bfloat16x8x4_t s_ = { { simde_bfloat16x8_from_private(a_[0]), - simde_bfloat16x8_from_private(a_[1]), - simde_bfloat16x8_from_private(a_[2]), - simde_bfloat16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld1q_bf16_x4 - #define vld1q_bf16_x4(a) simde_vld1q_bf16_x4((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X4_H) */ -/* :: End simde/arm/neon/ld1q_x4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD2_H) -#define SIMDE_ARM_NEON_LD2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/uzp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_UZP_H) && !defined(SIMDE_BUG_INTEL_857088) -#define SIMDE_ARM_NEON_UZP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vuzp_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzp_f16(a, b); - #else - simde_float16x4x2_t r = { { simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_f16 - #define vuzp_f16(a, b) simde_vuzp_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vuzp_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_f32(a, b); - #else - simde_float32x2x2_t r = { { simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_f32 - #define vuzp_f32(a, b) simde_vuzp_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vuzp_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_s8(a, b); - #else - simde_int8x8x2_t r = { { simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_s8 - #define vuzp_s8(a, b) simde_vuzp_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vuzp_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_s16(a, b); - #else - simde_int16x4x2_t r = { { simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_s16 - #define vuzp_s16(a, b) simde_vuzp_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vuzp_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_s32(a, b); - #else - simde_int32x2x2_t r = { { simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_s32 - #define vuzp_s32(a, b) simde_vuzp_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vuzp_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_u8(a, b); - #else - simde_uint8x8x2_t r = { { simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_u8 - #define vuzp_u8(a, b) simde_vuzp_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vuzp_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_u16(a, b); - #else - simde_uint16x4x2_t r = { { simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_u16 - #define vuzp_u16(a, b) simde_vuzp_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vuzp_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_u32(a, b); - #else - simde_uint32x2x2_t r = { { simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_u32 - #define vuzp_u32(a, b) simde_vuzp_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vuzpq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vuzpq_f16(a, b); - #else - simde_float16x8x2_t r = { { simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_f16 - #define vuzpq_f16(a, b) simde_vuzpq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vuzpq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_f32(a, b); - #else - simde_float32x4x2_t r = { { simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_f32 - #define vuzpq_f32(a, b) simde_vuzpq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vuzpq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_s8(a, b); - #else - simde_int8x16x2_t r = { { simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_s8 - #define vuzpq_s8(a, b) simde_vuzpq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vuzpq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_s16(a, b); - #else - simde_int16x8x2_t r = { { simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_s16 - #define vuzpq_s16(a, b) simde_vuzpq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vuzpq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_s32(a, b); - #else - simde_int32x4x2_t r = { { simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_s32 - #define vuzpq_s32(a, b) simde_vuzpq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vuzpq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_u8(a, b); - #else - simde_uint8x16x2_t r = { { simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_u8 - #define vuzpq_u8(a, b) simde_vuzpq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vuzpq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_u16(a, b); - #else - simde_uint16x8x2_t r = { { simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_u16 - #define vuzpq_u16(a, b) simde_vuzpq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vuzpq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_u32(a, b); - #else - simde_uint32x4x2_t r = { { simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_u32 - #define vuzpq_u32(a, b) simde_vuzpq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vuzp_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_p8(a, b); - #else - simde_poly8x8x2_t r = { { simde_vuzp1_p8(a, b), simde_vuzp2_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_p8 - #define vuzp_p8(a, b) simde_vuzp_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vuzp_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzp_p16(a, b); - #else - simde_poly16x4x2_t r = { { simde_vuzp1_p16(a, b), simde_vuzp2_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzp_p16 - #define vuzp_p16(a, b) simde_vuzp_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vuzpq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_p8(a, b); - #else - simde_poly8x16x2_t r = { { simde_vuzp1q_p8(a, b), simde_vuzp2q_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_p8 - #define vuzpq_p8(a, b) simde_vuzpq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vuzpq_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vuzpq_p16(a, b); - #else - simde_poly16x8x2_t r = { { simde_vuzp1q_p16(a, b), simde_vuzp2q_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vuzpq_p16 - #define vuzpq_p16(a, b) simde_vuzpq_p16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_UZP_H) */ -/* :: End simde/arm/neon/uzp.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_s8(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t a = wasm_v128_load(ptr); - simde_int8x16_private q_; - q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - simde_int8x16_t q = simde_int8x16_from_private(q_); - - simde_int8x8x2_t u = { - simde_vget_low_s8(q), - simde_vget_high_s8(q) - }; - return u; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_int8x16_private a_ = simde_int8x16_to_private(simde_vld1q_s8(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - simde_int8x8x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_int8x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int8x8x2_t r = { { - simde_int8x8_from_private(r_[0]), - simde_int8x8_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_s8 - #define vld2_s8(a) simde_vld2_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_s16(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_int16x8_private a_ = simde_int16x8_to_private(simde_vld1q_s16(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7); - simde_int16x4x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_int16x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_int16x4x2_t r = { { - simde_int16x4_from_private(r_[0]), - simde_int16x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_s16 - #define vld2_s16(a) simde_vld2_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_s32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_int32x4_private a_ = simde_int32x4_to_private(simde_vld1q_s32(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); - simde_int32x2x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_int32x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int32x2x2_t r = { { - simde_int32x2_from_private(r_[0]), - simde_int32x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_s32 - #define vld2_s32(a) simde_vld2_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x2_t -simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_s64(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_int64x2_private a_ = simde_int64x2_to_private(simde_vld1q_s64(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); - simde_int64x1x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_int64x1_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int64x1x2_t r = { { - simde_int64x1_from_private(r_[0]), - simde_int64x1_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_s64 - #define vld2_s64(a) simde_vld2_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_u8(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t a = wasm_v128_load(ptr); - simde_uint8x16_private q_; - q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - simde_uint8x16_t q = simde_uint8x16_from_private(q_); - - simde_uint8x8x2_t u = { - simde_vget_low_u8(q), - simde_vget_high_u8(q) - }; - return u; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vld1q_u8(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - simde_uint8x8x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_uint8x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint8x8x2_t r = { { - simde_uint8x8_from_private(r_[0]), - simde_uint8x8_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_u8 - #define vld2_u8(a) simde_vld2_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_u16(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vld1q_u16(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7); - simde_uint16x4x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_uint16x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_uint16x4x2_t r = { { - simde_uint16x4_from_private(r_[0]), - simde_uint16x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_u16 - #define vld2_u16(a) simde_vld2_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_u32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vld1q_u32(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); - simde_uint32x2x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_uint32x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint32x2x2_t r = { { - simde_uint32x2_from_private(r_[0]), - simde_uint32x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_u32 - #define vld2_u32(a) simde_vld2_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x2_t -simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_u64(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_vld1q_u64(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); - simde_uint64x1x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_uint64x1_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint64x1x2_t r = { { - simde_uint64x1_from_private(r_[0]), - simde_uint64x1_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_u64 - #define vld2_u64(a) simde_vld2_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld2_f16(ptr); - #else - simde_float16x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float16x4x2_t r = { { - simde_float16x4_from_private(r_[0]), - simde_float16x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_f16 - #define vld2_f16(a) simde_vld2_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_f32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_float32x4_private a_ = simde_float32x4_to_private(simde_vld1q_f32(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); - simde_float32x2x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_float32x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float32x2x2_t r = { { - simde_float32x2_from_private(r_[0]), - simde_float32x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_f32 - #define vld2_f32(a) simde_vld2_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x2_t -simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2_f64(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_float64x2_private a_ = simde_float64x2_to_private(simde_vld1q_f64(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); - simde_float64x1x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; - #else - simde_float64x1_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float64x1x2_t r = { { - simde_float64x1_from_private(r_[0]), - simde_float64x1_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_f64 - #define vld2_f64(a) simde_vld2_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_s8(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_s8( - simde_vld1q_s8(&(ptr[0])), - simde_vld1q_s8(&(ptr[16])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_int8x16_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int8x16x2_t r = { { - simde_int8x16_from_private(r_[0]), - simde_int8x16_from_private(r_[1]), - } }; - - return r; - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_s8 - #define vld2q_s8(a) simde_vld2q_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_s32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_s32( - simde_vld1q_s32(&(ptr[0])), - simde_vld1q_s32(&(ptr[4])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_int32x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_int32x4x2_t r = { { - simde_int32x4_from_private(r_[0]), - simde_int32x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_s32 - #define vld2q_s32(a) simde_vld2q_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_s16(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_s16( - simde_vld1q_s16(&(ptr[0])), - simde_vld1q_s16(&(ptr[8])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_int16x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int16x8x2_t r = { { - simde_int16x8_from_private(r_[0]), - simde_int16x8_from_private(r_[1]), - } }; - - return r; - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_s16 - #define vld2q_s16(a) simde_vld2q_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x2_t -simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2q_s64(ptr); - #else - simde_int64x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int64x2x2_t r = { { - simde_int64x2_from_private(r_[0]), - simde_int64x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_s64 - #define vld2q_s64(a) simde_vld2q_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_u8(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_u8( - simde_vld1q_u8(&(ptr[ 0])), - simde_vld1q_u8(&(ptr[16])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_uint8x16_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint8x16x2_t r = { { - simde_uint8x16_from_private(r_[0]), - simde_uint8x16_from_private(r_[1]), - } }; - - return r; - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_u8 - #define vld2q_u8(a) simde_vld2q_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_u16(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_u16( - simde_vld1q_u16(&(ptr[0])), - simde_vld1q_u16(&(ptr[8])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_uint16x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint16x8x2_t r = { { - simde_uint16x8_from_private(r_[0]), - simde_uint16x8_from_private(r_[1]), - } }; - - return r; - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_u16 - #define vld2q_u16(a) simde_vld2q_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_u32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_u32( - simde_vld1q_u32(&(ptr[0])), - simde_vld1q_u32(&(ptr[4])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_uint32x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_uint32x4x2_t r = { { - simde_uint32x4_from_private(r_[0]), - simde_uint32x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_u32 - #define vld2q_u32(a) simde_vld2q_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x2_t -simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2q_u64(ptr); - #else - simde_uint64x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint64x2x2_t r = { { - simde_uint64x2_from_private(r_[0]), - simde_uint64x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_u64 - #define vld2q_u64(a) simde_vld2q_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vld2q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld2q_f16(ptr); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_float16x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_float16x8x2_t r = { { - simde_float16x8_from_private(r_[0]), - simde_float16x8_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_f16 - #define vld2q_f16(a) simde_vld2q_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_f32(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return - simde_vuzpq_f32( - simde_vld1q_f32(&(ptr[0])), - simde_vld1q_f32(&(ptr[4])) - ); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_float32x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_float32x4x2_t r = { { - simde_float32x4_from_private(r_[0]), - simde_float32x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_f32 - #define vld2q_f32(a) simde_vld2q_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x2_t -simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2q_f64(ptr); - #else - simde_float64x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float64x2x2_t r = { { - simde_float64x2_from_private(r_[0]), - simde_float64x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_f64 - #define vld2q_f64(a) simde_vld2q_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vld2_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_p8(ptr); - #else - simde_poly8x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly8x8x2_t r = { { - simde_poly8x8_from_private(r_[0]), - simde_poly8x8_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_p8 - #define vld2_p8(a) simde_vld2_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vld2_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_p16(ptr); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_poly16x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) - HEDLEY_DIAGNOSTIC_POP - #endif - - simde_poly16x4x2_t r = { { - simde_poly16x4_from_private(r_[0]), - simde_poly16x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_p16 - #define vld2_p16(a) simde_vld2_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x2_t -simde_vld2_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2_p64(ptr); - #else - simde_poly64x1_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly64x1x2_t r = { { - simde_poly64x1_from_private(r_[0]), - simde_poly64x1_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2_p64 - #define vld2_p64(a) simde_vld2_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vld2q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_p8(ptr); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_poly8x16_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly8x16x2_t r = { { - simde_poly8x16_from_private(r_[0]), - simde_poly8x16_from_private(r_[1]), - } }; - - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_p8 - #define vld2q_p8(a) simde_vld2q_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vld2q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2q_p16(ptr); - #else - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ - #endif - simde_poly16x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly16x8x2_t r = { { - simde_poly16x8_from_private(r_[0]), - simde_poly16x8_from_private(r_[1]), - } }; - #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARCH_RISCV64) - HEDLEY_DIAGNOSTIC_POP - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_p16 - #define vld2q_p16(a) simde_vld2q_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x2_t -simde_vld2q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_p64(ptr); - #else - simde_poly64x2_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly64x2x2_t r = { { - simde_poly64x2_from_private(r_[0]), - simde_poly64x2_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_p64 - #define vld2q_p64(a) simde_vld2q_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x2_t -simde_vld2_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld2_bf16(ptr); - #else - simde_bfloat16x4_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_bfloat16x4x2_t r = { { - simde_bfloat16x4_from_private(r_[0]), - simde_bfloat16x4_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2_bf16 - #define vld2_bf16(a) simde_vld2_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x2_t -simde_vld2q_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld2q_bf16(ptr); - #else - simde_bfloat16x8_private r_[2]; - - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_bfloat16x8x2_t r = { { - simde_bfloat16x8_from_private(r_[0]), - simde_bfloat16x8_from_private(r_[1]), - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_bf16 - #define vld2q_bf16(a) simde_vld2q_bf16((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD2_H) */ -/* :: End simde/arm/neon/ld2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld2_dup.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD2_DUP_H) -#define SIMDE_ARM_NEON_LD2_DUP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vld2_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld2_dup_f16(ptr); - #else - simde_float16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_f16 - #define vld2_dup_f16(a) simde_vld2_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vld2_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_f32(ptr); - #else - simde_float32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_f32 - #define vld2_dup_f32(a) simde_vld2_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x2_t -simde_vld2_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2_dup_f64(ptr); - #else - simde_float64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_f64 - #define vld2_dup_f64(a) simde_vld2_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vld2_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_s8(ptr); - #else - simde_int8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_s8 - #define vld2_dup_s8(a) simde_vld2_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vld2_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_s16(ptr); - #else - simde_int16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_s16 - #define vld2_dup_s16(a) simde_vld2_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vld2_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_s32(ptr); - #else - simde_int32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_s32 - #define vld2_dup_s32(a) simde_vld2_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x2_t -simde_vld2_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_s64(ptr); - #else - simde_int64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_s64 - #define vld2_dup_s64(a) simde_vld2_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vld2_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_u8(ptr); - #else - simde_uint8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_u8 - #define vld2_dup_u8(a) simde_vld2_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vld2_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_u16(ptr); - #else - simde_uint16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_u16 - #define vld2_dup_u16(a) simde_vld2_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vld2_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_u32(ptr); - #else - simde_uint32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_u32 - #define vld2_dup_u32(a) simde_vld2_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x2_t -simde_vld2_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_u64(ptr); - #else - simde_uint64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_u64 - #define vld2_dup_u64(a) simde_vld2_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vld2q_dup_f16(simde_float16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld2q_dup_f16(ptr); - #else - simde_float16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_f16 - #define vld2q_dup_f16(a) simde_vld2q_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vld2q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_f32(ptr); - #else - simde_float32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_f32 - #define vld2q_dup_f32(a) simde_vld2q_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x2_t -simde_vld2q_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2q_dup_f64(ptr); - #else - simde_float64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_f64 - #define vld2q_dup_f64(a) simde_vld2q_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vld2q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_s8(ptr); - #else - simde_int8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_s8 - #define vld2q_dup_s8(a) simde_vld2q_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vld2q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_s16(ptr); - #else - simde_int16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_s16 - #define vld2q_dup_s16(a) simde_vld2q_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vld2q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_s32(ptr); - #else - simde_int32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_s32 - #define vld2q_dup_s32(a) simde_vld2q_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x2_t -simde_vld2q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_s64(ptr); - #else - simde_int64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_s64 - #define vld2q_dup_s64(a) simde_vld2q_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vld2q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_u8(ptr); - #else - simde_uint8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_u8 - #define vld2q_dup_u8(a) simde_vld2q_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vld2q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_u16(ptr); - #else - simde_uint16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_u16 - #define vld2q_dup_u16(a) simde_vld2q_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vld2q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_u32(ptr); - #else - simde_uint32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_u32 - #define vld2q_dup_u32(a) simde_vld2q_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x2_t -simde_vld2q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2q_dup_u64(ptr); - #else - simde_uint64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_u64 - #define vld2q_dup_u64(a) simde_vld2q_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vld2_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_p8(ptr); - #else - simde_poly8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_p8 - #define vld2_dup_p8(a) simde_vld2_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vld2_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld2_dup_p16(ptr); - #else - simde_poly16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_p16 - #define vld2_dup_p16(a) simde_vld2_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x2_t -simde_vld2_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld2_dup_p64(ptr); - #else - simde_poly64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_p64 - #define vld2_dup_p64(a) simde_vld2_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vld2q_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) && \ - !defined(SIMDE_BUG_CLANG_71763) - return vld2q_dup_p8(ptr); - #else - simde_poly8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_p8 - #define vld2q_dup_p8(a) simde_vld2q_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vld2q_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) && \ - !defined(SIMDE_BUG_CLANG_71763) - return vld2q_dup_p16(ptr); - #else - simde_poly16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_p16 - #define vld2q_dup_p16(a) simde_vld2q_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x2_t -simde_vld2q_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld2q_dup_p64(ptr); - #else - simde_poly64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_p64 - #define vld2q_dup_p64(a) simde_vld2q_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x2_t -simde_vld2_dup_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld2_dup_bf16(ptr); - #else - simde_bfloat16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdup_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2_dup_bf16 - #define vld2_dup_bf16(a) simde_vld2_dup_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x2_t -simde_vld2q_dup_bf16(simde_bfloat16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld2q_dup_bf16(ptr); - #else - simde_bfloat16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - r.val[i] = simde_vdupq_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_dup_bf16 - #define vld2q_dup_bf16(a) simde_vld2q_dup_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD2_DUP_H) */ -/* :: End simde/arm/neon/ld2_dup.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld2_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD2_LANE_H) -#define SIMDE_ARM_NEON_LD2_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t simde_vld2_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_s8(ptr, src, lane) vld2_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_s8 - #define vld2_lane_s8(ptr, src, lane) simde_vld2_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t simde_vld2_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_s16(ptr, src, lane) vld2_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_s16 - #define vld2_lane_s16(ptr, src, lane) simde_vld2_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t simde_vld2_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_s32(ptr, src, lane) vld2_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_s32 - #define vld2_lane_s32(ptr, src, lane) simde_vld2_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x2_t simde_vld2_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x1x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_int64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2_lane_s64(ptr, src, lane) vld2_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_s64 - #define vld2_lane_s64(ptr, src, lane) simde_vld2_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t simde_vld2_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_u8(ptr, src, lane) vld2_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_u8 - #define vld2_lane_u8(ptr, src, lane) simde_vld2_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t simde_vld2_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_u16(ptr, src, lane) vld2_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_u16 - #define vld2_lane_u16(ptr, src, lane) simde_vld2_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t simde_vld2_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_u32(ptr, src, lane) vld2_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_u32 - #define vld2_lane_u32(ptr, src, lane) simde_vld2_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x2_t simde_vld2_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_uint64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2_lane_u64(ptr, src, lane) vld2_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_u64 - #define vld2_lane_u64(ptr, src, lane) simde_vld2_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t simde_vld2_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld2_lane_f16(ptr, src, lane) vld2_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_f16 - #define vld2_lane_f16(ptr, src, lane) simde_vld2_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t simde_vld2_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_f32(ptr, src, lane) vld2_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_f32 - #define vld2_lane_f32(ptr, src, lane) simde_vld2_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x2_t simde_vld2_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x1x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2_lane_f64(ptr, src, lane) vld2_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_f64 - #define vld2_lane_f64(ptr, src, lane) simde_vld2_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t simde_vld2q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x16x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_int8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_s8(ptr, src, lane) vld2q_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_s8 - #define vld2q_lane_s8(ptr, src, lane) simde_vld2q_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t simde_vld2q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_s16(ptr, src, lane) vld2q_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_s16 - #define vld2q_lane_s16(ptr, src, lane) simde_vld2q_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t simde_vld2q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2q_lane_s32(ptr, src, lane) vld2q_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_s32 - #define vld2q_lane_s32(ptr, src, lane) simde_vld2q_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x2_t simde_vld2q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_s64(ptr, src, lane) vld2q_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_s64 - #define vld2q_lane_s64(ptr, src, lane) simde_vld2q_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t simde_vld2q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x16x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_uint8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_u8(ptr, src, lane) vld2q_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_u8 - #define vld2q_lane_u8(ptr, src, lane) simde_vld2q_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t simde_vld2q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_u16(ptr, src, lane) vld2q_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_u16 - #define vld2q_lane_u16(ptr, src, lane) simde_vld2q_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t simde_vld2q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2q_lane_u32(ptr, src, lane) vld2q_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_u32 - #define vld2q_lane_u32(ptr, src, lane) simde_vld2q_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x2_t simde_vld2q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_u64(ptr, src, lane) vld2q_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_u64 - #define vld2q_lane_u64(ptr, src, lane) simde_vld2q_lane_u64((ptr), (src), (lane)) -#endif -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t simde_vld2q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld2q_lane_f16(ptr, src, lane) vld2q_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_f16 - #define vld2q_lane_f16(ptr, src, lane) simde_vld2q_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t simde_vld2q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2q_lane_f32(ptr, src, lane) vld2q_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_f32 - #define vld2q_lane_f32(ptr, src, lane) simde_vld2q_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x2_t simde_vld2q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_f64(ptr, src, lane) vld2q_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_f64 - #define vld2q_lane_f64(ptr, src, lane) simde_vld2q_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t simde_vld2_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly8x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly8x8_private tmp_ = simde_poly8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_p8(ptr, src, lane) vld2_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_p8 - #define vld2_lane_p8(ptr, src, lane) simde_vld2_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t simde_vld2_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly16x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly16x4_private tmp_ = simde_poly16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld2_lane_p16(ptr, src, lane) vld2_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_p16 - #define vld2_lane_p16(ptr, src, lane) simde_vld2_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x2_t simde_vld2_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x1x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64x1x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly64x1_private tmp_ = simde_poly64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2_lane_p64(ptr, src, lane) vld2_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_p64 - #define vld2_lane_p64(ptr, src, lane) simde_vld2_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t simde_vld2q_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly8x16x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8x16x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly8x16_private tmp_ = simde_poly8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_p8(ptr, src, lane) vld2q_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_p8 - #define vld2q_lane_p8(ptr, src, lane) simde_vld2q_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t simde_vld2q_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly16x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly16x8_private tmp_ = simde_poly16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_p16(ptr, src, lane) vld2q_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_p16 - #define vld2q_lane_p16(ptr, src, lane) simde_vld2q_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x2_t simde_vld2q_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x2x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64x2x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_poly64x2_private tmp_ = simde_poly64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld2q_lane_p64(ptr, src, lane) vld2q_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_p64 - #define vld2q_lane_p64(ptr, src, lane) simde_vld2q_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x2_t simde_vld2_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_bfloat16x4x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16x4x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_bfloat16x4_private tmp_ = simde_bfloat16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld2_lane_bf16(ptr, src, lane) vld2_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2_lane_bf16 - #define vld2_lane_bf16(ptr, src, lane) simde_vld2_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x2_t simde_vld2q_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_bfloat16x8x2_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16x8x2_t r; - - for (size_t i = 0 ; i < 2 ; i++) { - simde_bfloat16x8_private tmp_ = simde_bfloat16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld2q_lane_bf16(ptr, src, lane) vld2q_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld2q_lane_bf16 - #define vld2q_lane_bf16(ptr, src, lane) simde_vld2q_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD2_LANE_H) */ -/* :: End simde/arm/neon/ld2_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD3_H) -#define SIMDE_ARM_NEON_LD3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x3_t -simde_vld3_f16(simde_float16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld3_f16(ptr); - #else - simde_float16x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float16x4x3_t r = { { - simde_float16x4_from_private(r_[0]), - simde_float16x4_from_private(r_[1]), - simde_float16x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_f16 - #define vld3_f16(a) simde_vld3_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x3_t -simde_vld3_f32(simde_float32 const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_f32(ptr); - #else - simde_float32x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float32x2x3_t r = { { - simde_float32x2_from_private(r_[0]), - simde_float32x2_from_private(r_[1]), - simde_float32x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_f32 - #define vld3_f32(a) simde_vld3_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x3_t -simde_vld3_f64(simde_float64 const *ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3_f64(ptr); - #else - simde_float64x1_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float64x1x3_t r = { { - simde_float64x1_from_private(r_[0]), - simde_float64x1_from_private(r_[1]), - simde_float64x1_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_f64 - #define vld3_f64(a) simde_vld3_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x3_t -simde_vld3_s8(int8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_s8(ptr); - #else - simde_int8x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int8x8x3_t r = { { - simde_int8x8_from_private(r_[0]), - simde_int8x8_from_private(r_[1]), - simde_int8x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_s8 - #define vld3_s8(a) simde_vld3_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x3_t -simde_vld3_s16(int16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_s16(ptr); - #else - simde_int16x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int16x4x3_t r = { { - simde_int16x4_from_private(r_[0]), - simde_int16x4_from_private(r_[1]), - simde_int16x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_s16 - #define vld3_s16(a) simde_vld3_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x3_t -simde_vld3_s32(int32_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_s32(ptr); - #else - simde_int32x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int32x2x3_t r = { { - simde_int32x2_from_private(r_[0]), - simde_int32x2_from_private(r_[1]), - simde_int32x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_s32 - #define vld3_s32(a) simde_vld3_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x3_t -simde_vld3_s64(int64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_s64(ptr); - #else - simde_int64x1_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int64x1x3_t r = { { - simde_int64x1_from_private(r_[0]), - simde_int64x1_from_private(r_[1]), - simde_int64x1_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_s64 - #define vld3_s64(a) simde_vld3_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x3_t -simde_vld3_u8(uint8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_u8(ptr); - #else - simde_uint8x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint8x8x3_t r = { { - simde_uint8x8_from_private(r_[0]), - simde_uint8x8_from_private(r_[1]), - simde_uint8x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_u8 - #define vld3_u8(a) simde_vld3_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x3_t -simde_vld3_u16(uint16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_u16(ptr); - #else - simde_uint16x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint16x4x3_t r = { { - simde_uint16x4_from_private(r_[0]), - simde_uint16x4_from_private(r_[1]), - simde_uint16x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_u16 - #define vld3_u16(a) simde_vld3_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x3_t -simde_vld3_u32(uint32_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_u32(ptr); - #else - simde_uint32x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint32x2x3_t r = { { - simde_uint32x2_from_private(r_[0]), - simde_uint32x2_from_private(r_[1]), - simde_uint32x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_u32 - #define vld3_u32(a) simde_vld3_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x3_t -simde_vld3_u64(uint64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_u64(ptr); - #else - simde_uint64x1_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint64x1x3_t r = { { - simde_uint64x1_from_private(r_[0]), - simde_uint64x1_from_private(r_[1]), - simde_uint64x1_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_u64 - #define vld3_u64(a) simde_vld3_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x3_t -simde_vld3q_f16(simde_float16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld3q_f16(ptr); - #else - simde_float16x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float16x8x3_t r = { { - simde_float16x8_from_private(r_[0]), - simde_float16x8_from_private(r_[1]), - simde_float16x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_f16 - #define vld3q_f16(a) simde_vld3q_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x3_t -simde_vld3q_f32(simde_float32 const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_f32(ptr); - #else - simde_float32x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float32x4x3_t r = { { - simde_float32x4_from_private(r_[0]), - simde_float32x4_from_private(r_[1]), - simde_float32x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_f32 - #define vld3q_f32(a) simde_vld3q_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x3_t -simde_vld3q_f64(simde_float64 const *ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3q_f64(ptr); - #else - simde_float64x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_float64x2x3_t r = { { - simde_float64x2_from_private(r_[0]), - simde_float64x2_from_private(r_[1]), - simde_float64x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_f64 - #define vld3q_f64(a) simde_vld3q_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x3_t -simde_vld3q_s8(int8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_s8(ptr); - #else - simde_int8x16_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int8x16x3_t r = { { - simde_int8x16_from_private(r_[0]), - simde_int8x16_from_private(r_[1]), - simde_int8x16_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_s8 - #define vld3q_s8(a) simde_vld3q_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x3_t -simde_vld3q_s16(int16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_s16(ptr); - #else - simde_int16x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int16x8x3_t r = { { - simde_int16x8_from_private(r_[0]), - simde_int16x8_from_private(r_[1]), - simde_int16x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_s16 - #define vld3q_s16(a) simde_vld3q_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x3_t -simde_vld3q_s32(int32_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_s32(ptr); - #else - simde_int32x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int32x4x3_t r = { { - simde_int32x4_from_private(r_[0]), - simde_int32x4_from_private(r_[1]), - simde_int32x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_s32 - #define vld3q_s32(a) simde_vld3q_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x3_t -simde_vld3q_s64(int64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3q_s64(ptr); - #else - simde_int64x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_int64x2x3_t r = { { - simde_int64x2_from_private(r_[0]), - simde_int64x2_from_private(r_[1]), - simde_int64x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_s64 - #define vld3q_s64(a) simde_vld3q_s64((a)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x3_t -simde_vld3q_u8(uint8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_u8(ptr); - #else - simde_uint8x16_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint8x16x3_t r = { { - simde_uint8x16_from_private(r_[0]), - simde_uint8x16_from_private(r_[1]), - simde_uint8x16_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_u8 - #define vld3q_u8(a) simde_vld3q_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x3_t -simde_vld3q_u16(uint16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_u16(ptr); - #else - simde_uint16x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint16x8x3_t r = { { - simde_uint16x8_from_private(r_[0]), - simde_uint16x8_from_private(r_[1]), - simde_uint16x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_u16 - #define vld3q_u16(a) simde_vld3q_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x3_t -simde_vld3q_u32(uint32_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_u32(ptr); - #else - simde_uint32x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint32x4x3_t r = { { - simde_uint32x4_from_private(r_[0]), - simde_uint32x4_from_private(r_[1]), - simde_uint32x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_u32 - #define vld3q_u32(a) simde_vld3q_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x3_t -simde_vld3q_u64(uint64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3q_u64(ptr); - #else - simde_uint64x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_uint64x2x3_t r = { { - simde_uint64x2_from_private(r_[0]), - simde_uint64x2_from_private(r_[1]), - simde_uint64x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_u64 - #define vld3q_u64(a) simde_vld3q_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x3_t -simde_vld3_p8(simde_poly8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_p8(ptr); - #else - simde_poly8x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly8x8x3_t r = { { - simde_poly8x8_from_private(r_[0]), - simde_poly8x8_from_private(r_[1]), - simde_poly8x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_p8 - #define vld3_p8(a) simde_vld3_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x3_t -simde_vld3_p16(simde_poly16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_p16(ptr); - #else - simde_poly16x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly16x4x3_t r = { { - simde_poly16x4_from_private(r_[0]), - simde_poly16x4_from_private(r_[1]), - simde_poly16x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_p16 - #define vld3_p16(a) simde_vld3_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x3_t -simde_vld3_p64(simde_poly64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3_p64(ptr); - #else - simde_poly64x1_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly64x1x3_t r = { { - simde_poly64x1_from_private(r_[0]), - simde_poly64x1_from_private(r_[1]), - simde_poly64x1_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3_p64 - #define vld3_p64(a) simde_vld3_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x3_t -simde_vld3q_p8(simde_poly8_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_p8(ptr); - #else - simde_poly8x16_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly8x16x3_t r = { { - simde_poly8x16_from_private(r_[0]), - simde_poly8x16_from_private(r_[1]), - simde_poly8x16_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_p8 - #define vld3q_p8(a) simde_vld3q_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x3_t -simde_vld3q_p16(simde_poly16_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3q_p16(ptr); - #else - simde_poly16x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly16x8x3_t r = { { - simde_poly16x8_from_private(r_[0]), - simde_poly16x8_from_private(r_[1]), - simde_poly16x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_p16 - #define vld3q_p16(a) simde_vld3q_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x3_t -simde_vld3q_p64(simde_poly64_t const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_p64(ptr); - #else - simde_poly64x2_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_poly64x2x3_t r = { { - simde_poly64x2_from_private(r_[0]), - simde_poly64x2_from_private(r_[1]), - simde_poly64x2_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_p64 - #define vld3q_p64(a) simde_vld3q_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x3_t -simde_vld3_bf16(simde_bfloat16 const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld3_bf16(ptr); - #else - simde_bfloat16x4_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_bfloat16x4x3_t r = { { - simde_bfloat16x4_from_private(r_[0]), - simde_bfloat16x4_from_private(r_[1]), - simde_bfloat16x4_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3_bf16 - #define vld3_bf16(a) simde_vld3_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x3_t -simde_vld3q_bf16(simde_bfloat16 const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld3q_bf16(ptr); - #else - simde_bfloat16x8_private r_[3]; - - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { - for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { - r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; - } - } - - simde_bfloat16x8x3_t r = { { - simde_bfloat16x8_from_private(r_[0]), - simde_bfloat16x8_from_private(r_[1]), - simde_bfloat16x8_from_private(r_[2]) - } }; - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_bf16 - #define vld3q_bf16(a) simde_vld3q_bf16((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD3_H) */ -/* :: End simde/arm/neon/ld3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld3_dup.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD3_DUP_H) -#define SIMDE_ARM_NEON_LD3_DUP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x3_t -simde_vld3_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld3_dup_f16(ptr); - #else - simde_float16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_f16 - #define vld3_dup_f16(a) simde_vld3_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x3_t -simde_vld3_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_f32(ptr); - #else - simde_float32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_f32 - #define vld3_dup_f32(a) simde_vld3_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x3_t -simde_vld3_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3_dup_f64(ptr); - #else - simde_float64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_f64 - #define vld3_dup_f64(a) simde_vld3_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x3_t -simde_vld3_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_s8(ptr); - #else - simde_int8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_s8 - #define vld3_dup_s8(a) simde_vld3_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x3_t -simde_vld3_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_s16(ptr); - #else - simde_int16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_s16 - #define vld3_dup_s16(a) simde_vld3_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x3_t -simde_vld3_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_s32(ptr); - #else - simde_int32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_s32 - #define vld3_dup_s32(a) simde_vld3_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x3_t -simde_vld3_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_s64(ptr); - #else - simde_int64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_s64 - #define vld3_dup_s64(a) simde_vld3_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x3_t -simde_vld3_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_u8(ptr); - #else - simde_uint8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_u8 - #define vld3_dup_u8(a) simde_vld3_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x3_t -simde_vld3_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_u16(ptr); - #else - simde_uint16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_u16 - #define vld3_dup_u16(a) simde_vld3_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x3_t -simde_vld3_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_u32(ptr); - #else - simde_uint32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_u32 - #define vld3_dup_u32(a) simde_vld3_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x3_t -simde_vld3_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld3_dup_u64(ptr); - #else - simde_uint64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_u64 - #define vld3_dup_u64(a) simde_vld3_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x3_t -simde_vld3q_dup_f16(simde_float16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld3q_dup_f16(ptr); - #else - simde_float16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_f16 - #define vld3q_dup_f16(a) simde_vld3q_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x3_t -simde_vld3q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_f32(ptr); - #else - simde_float32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_f32 - #define vld3q_dup_f32(a) simde_vld3q_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x3_t -simde_vld3q_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld3q_dup_f64(ptr); - #else - simde_float64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_f64 - #define vld3q_dup_f64(a) simde_vld3q_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x3_t -simde_vld3q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_s8(ptr); - #else - simde_int8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_s8 - #define vld3q_dup_s8(a) simde_vld3q_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x3_t -simde_vld3q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_s16(ptr); - #else - simde_int16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_s16 - #define vld3q_dup_s16(a) simde_vld3q_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x3_t -simde_vld3q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_s32(ptr); - #else - simde_int32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_s32 - #define vld3q_dup_s32(a) simde_vld3q_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x3_t -simde_vld3q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_s64(ptr); - #else - simde_int64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_s64 - #define vld3q_dup_s64(a) simde_vld3q_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x3_t -simde_vld3q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_u8(ptr); - #else - simde_uint8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_u8 - #define vld3q_dup_u8(a) simde_vld3q_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x3_t -simde_vld3q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_u16(ptr); - #else - simde_uint16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_u16 - #define vld3q_dup_u16(a) simde_vld3q_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x3_t -simde_vld3q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_u32(ptr); - #else - simde_uint32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_u32 - #define vld3q_dup_u32(a) simde_vld3q_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x3_t -simde_vld3q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_u64(ptr); - #else - simde_uint64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_u64 - #define vld3q_dup_u64(a) simde_vld3q_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x3_t -simde_vld3_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld3_dup_p8(ptr); - #else - simde_poly8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_p8 - #define vld3_dup_p8(a) simde_vld3_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x3_t -simde_vld3_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld3_dup_p16(ptr); - #else - simde_poly16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_p16 - #define vld3_dup_p16(a) simde_vld3_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x3_t -simde_vld3_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3_dup_p64(ptr); - #else - simde_poly64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_p64 - #define vld3_dup_p64(a) simde_vld3_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x3_t -simde_vld3q_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld3q_dup_p8(ptr); - #else - simde_poly8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_p8 - #define vld3q_dup_p8(a) simde_vld3q_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x3_t -simde_vld3q_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld3q_dup_p16(ptr); - #else - simde_poly16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_p16 - #define vld3q_dup_p16(a) simde_vld3q_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x3_t -simde_vld3q_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld3q_dup_p64(ptr); - #else - simde_poly64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_p64 - #define vld3q_dup_p64(a) simde_vld3q_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x3_t -simde_vld3_dup_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld3_dup_bf16(ptr); - #else - simde_bfloat16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdup_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3_dup_bf16 - #define vld3_dup_bf16(a) simde_vld3_dup_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x3_t -simde_vld3q_dup_bf16(simde_bfloat16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld3q_dup_bf16(ptr); - #else - simde_bfloat16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - r.val[i] = simde_vdupq_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_dup_bf16 - #define vld3q_dup_bf16(a) simde_vld3q_dup_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD3_DUP_H) */ -/* :: End simde/arm/neon/ld3_dup.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld3_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD3_LANE_H) -#define SIMDE_ARM_NEON_LD3_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x3_t simde_vld3_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_s8(ptr, src, lane) vld3_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_s8 - #define vld3_lane_s8(ptr, src, lane) simde_vld3_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x3_t simde_vld3_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_s16(ptr, src, lane) vld3_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_s16 - #define vld3_lane_s16(ptr, src, lane) simde_vld3_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x3_t simde_vld3_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_s32(ptr, src, lane) vld3_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_s32 - #define vld3_lane_s32(ptr, src, lane) simde_vld3_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x3_t simde_vld3_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_int64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3_lane_s64(ptr, src, lane) vld3_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_s64 - #define vld3_lane_s64(ptr, src, lane) simde_vld3_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x3_t simde_vld3_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_u8(ptr, src, lane) vld3_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_u8 - #define vld3_lane_u8(ptr, src, lane) simde_vld3_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x3_t simde_vld3_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_u16(ptr, src, lane) vld3_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_u16 - #define vld3_lane_u16(ptr, src, lane) simde_vld3_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x3_t simde_vld3_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_u32(ptr, src, lane) vld3_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_u32 - #define vld3_lane_u32(ptr, src, lane) simde_vld3_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x3_t simde_vld3_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_uint64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3_lane_u64(ptr, src, lane) vld3_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_u64 - #define vld3_lane_u64(ptr, src, lane) simde_vld3_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x3_t simde_vld3_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld3_lane_f16(ptr, src, lane) vld3_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_f16 - #define vld3_lane_f16(ptr, src, lane) simde_vld3_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x3_t simde_vld3_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_f32(ptr, src, lane) vld3_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_f32 - #define vld3_lane_f32(ptr, src, lane) simde_vld3_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x3_t simde_vld3_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3_lane_f64(ptr, src, lane) vld3_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_f64 - #define vld3_lane_f64(ptr, src, lane) simde_vld3_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x3_t simde_vld3q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x16x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_int8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_s8(ptr, src, lane) vld3q_lane_s8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_s8 - #define vld3q_lane_s8(ptr, src, lane) simde_vld3q_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x3_t simde_vld3q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_s16(ptr, src, lane) vld3q_lane_s16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_s16 - #define vld3q_lane_s16(ptr, src, lane) simde_vld3q_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x3_t simde_vld3q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3q_lane_s32(ptr, src, lane) vld3q_lane_s32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_s32 - #define vld3q_lane_s32(ptr, src, lane) simde_vld3q_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x3_t simde_vld3q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_s64(ptr, src, lane) vld3q_lane_s64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_s64 - #define vld3q_lane_s64(ptr, src, lane) simde_vld3q_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x3_t simde_vld3q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x16x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_uint8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_u8(ptr, src, lane) vld3q_lane_u8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_u8 - #define vld3q_lane_u8(ptr, src, lane) simde_vld3q_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x3_t simde_vld3q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_u16(ptr, src, lane) vld3q_lane_u16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_u16 - #define vld3q_lane_u16(ptr, src, lane) simde_vld3q_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x3_t simde_vld3q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3q_lane_u32(ptr, src, lane) vld3q_lane_u32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_u32 - #define vld3q_lane_u32(ptr, src, lane) simde_vld3q_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x3_t simde_vld3q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_u64(ptr, src, lane) vld3q_lane_u64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_u64 - #define vld3q_lane_u64(ptr, src, lane) simde_vld3q_lane_u64((ptr), (src), (lane)) -#endif -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x3_t simde_vld3q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vld3q_lane_f16(ptr, src, lane) vld3q_lane_f16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_f16 - #define vld3q_lane_f16(ptr, src, lane) simde_vld3q_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x3_t simde_vld3q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3q_lane_f32(ptr, src, lane) vld3q_lane_f32(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_f32 - #define vld3q_lane_f32(ptr, src, lane) simde_vld3q_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x3_t simde_vld3q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_f64(ptr, src, lane) vld3q_lane_f64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_f64 - #define vld3q_lane_f64(ptr, src, lane) simde_vld3q_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x3_t simde_vld3_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly8x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly8x8_private tmp_ = simde_poly8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_p8(ptr, src, lane) vld3_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_p8 - #define vld3_lane_p8(ptr, src, lane) simde_vld3_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x3_t simde_vld3_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly16x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly16x4_private tmp_ = simde_poly16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld3_lane_p16(ptr, src, lane) vld3_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_p16 - #define vld3_lane_p16(ptr, src, lane) simde_vld3_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x3_t simde_vld3_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x1x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64x1x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly64x1_private tmp_ = simde_poly64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x1_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3_lane_p64(ptr, src, lane) vld3_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_p64 - #define vld3_lane_p64(ptr, src, lane) simde_vld3_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x3_t simde_vld3q_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly8x16x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8x16x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly8x16_private tmp_ = simde_poly8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x16_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_p8(ptr, src, lane) vld3q_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_p8 - #define vld3q_lane_p8(ptr, src, lane) simde_vld3q_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x3_t simde_vld3q_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly16x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly16x8_private tmp_ = simde_poly16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_p16(ptr, src, lane) vld3q_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_p16 - #define vld3q_lane_p16(ptr, src, lane) simde_vld3q_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x3_t simde_vld3q_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x2x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64x2x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_poly64x2_private tmp_ = simde_poly64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x2_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld3q_lane_p64(ptr, src, lane) vld3q_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_p64 - #define vld3q_lane_p64(ptr, src, lane) simde_vld3q_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x3_t simde_vld3_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_bfloat16x4x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16x4x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_bfloat16x4_private tmp_ = simde_bfloat16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x4_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld3_lane_bf16(ptr, src, lane) vld3_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3_lane_bf16 - #define vld3_lane_bf16(ptr, src, lane) simde_vld3_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x3_t simde_vld3q_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_bfloat16x8x3_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16x8x3_t r; - - for (size_t i = 0 ; i < 3 ; i++) { - simde_bfloat16x8_private tmp_ = simde_bfloat16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x8_from_private(tmp_); - } - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld3q_lane_bf16(ptr, src, lane) vld3q_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld3q_lane_bf16 - #define vld3q_lane_bf16(ptr, src, lane) simde_vld3q_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD3_LANE_H) */ -/* :: End simde/arm/neon/ld3_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD4_H) -#define SIMDE_ARM_NEON_LD4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x4_t -simde_vld4_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld4_f16(ptr); - #else - simde_float16x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float16x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float16x4x4_t s_ = { { simde_float16x4_from_private(a_[0]), simde_float16x4_from_private(a_[1]), - simde_float16x4_from_private(a_[2]), simde_float16x4_from_private(a_[3]) } }; - return (s_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_f16 - #define vld4_f16(a) simde_vld4_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x4_t -simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_f32(ptr); - #else - simde_float32x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]), - simde_float32x2_from_private(a_[2]), simde_float32x2_from_private(a_[3]) } }; - return (s_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_f32 - #define vld4_f32(a) simde_vld4_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x4_t -simde_vld4_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4_f64(ptr); - #else - simde_float64x1_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]), - simde_float64x1_from_private(a_[2]), simde_float64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_f64 - #define vld4_f64(a) simde_vld4_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x4_t -simde_vld4_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_s8(ptr); - #else - simde_int8x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]), - simde_int8x8_from_private(a_[2]), simde_int8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_s8 - #define vld4_s8(a) simde_vld4_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x4_t -simde_vld4_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_s16(ptr); - #else - simde_int16x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]), - simde_int16x4_from_private(a_[2]), simde_int16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_s16 - #define vld4_s16(a) simde_vld4_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x4_t -simde_vld4_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_s32(ptr); - #else - simde_int32x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]), - simde_int32x2_from_private(a_[2]), simde_int32x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_s32 - #define vld4_s32(a) simde_vld4_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x4_t -simde_vld4_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_s64(ptr); - #else - simde_int64x1_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]), - simde_int64x1_from_private(a_[2]), simde_int64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_s64 - #define vld4_s64(a) simde_vld4_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x4_t -simde_vld4_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_u8(ptr); - #else - simde_uint8x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]), - simde_uint8x8_from_private(a_[2]), simde_uint8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_u8 - #define vld4_u8(a) simde_vld4_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x4_t -simde_vld4_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_u16(ptr); - #else - simde_uint16x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]), - simde_uint16x4_from_private(a_[2]), simde_uint16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_u16 - #define vld4_u16(a) simde_vld4_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x4_t -simde_vld4_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_u32(ptr); - #else - simde_uint32x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]), - simde_uint32x2_from_private(a_[2]), simde_uint32x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_u32 - #define vld4_u32(a) simde_vld4_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x4_t -simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_u64(ptr); - #else - simde_uint64x1_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]), - simde_uint64x1_from_private(a_[2]), simde_uint64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_u64 - #define vld4_u64(a) simde_vld4_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x4_t -simde_vld4q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld4q_f16(ptr); - #else - simde_float16x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float16x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float16x8x4_t s_ = { { simde_float16x8_from_private(a_[0]), simde_float16x8_from_private(a_[1]), - simde_float16x8_from_private(a_[2]), simde_float16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_f16 - #define vld4q_f16(a) simde_vld4q_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x4_t -simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_f32(ptr); - #else - simde_float32x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]), - simde_float32x4_from_private(a_[2]), simde_float32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_f32 - #define vld4q_f32(a) simde_vld4q_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x4_t -simde_vld4q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4q_f64(ptr); - #else - simde_float64x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]), - simde_float64x2_from_private(a_[2]), simde_float64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_f64 - #define vld4q_f64(a) simde_vld4q_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x4_t -simde_vld4q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_s8(ptr); - #else - simde_int8x16_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]), - simde_int8x16_from_private(a_[2]), simde_int8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_s8 - #define vld4q_s8(a) simde_vld4q_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x4_t -simde_vld4q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_s16(ptr); - #else - simde_int16x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]), - simde_int16x8_from_private(a_[2]), simde_int16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_s16 - #define vld4q_s16(a) simde_vld4q_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x4_t -simde_vld4q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_s32(ptr); - #else - simde_int32x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]), - simde_int32x4_from_private(a_[2]), simde_int32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_s32 - #define vld4q_s32(a) simde_vld4q_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x4_t -simde_vld4q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4q_s64(ptr); - #else - simde_int64x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]), - simde_int64x2_from_private(a_[2]), simde_int64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_s64 - #define vld4q_s64(a) simde_vld4q_s64((a)) -#endif -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x4_t -simde_vld4q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_u8(ptr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - // Let a, b, c, d be the 4 uint8x16 to return, they are laid out in memory: - // [a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3, - // a4, b4, c4, d4, a5, b5, c5, d5, a6, b6, c6, d6, a7, b7, c7, d7, - // a8, b8, c8, d8, a9, b9, c9, d9, a10, b10, c10, d10, a11, b11, c11, d11, - // a12, b12, c12, d12, a13, b13, c13, d13, a14, b14, c14, d14, a15, b15, c15, d15] - v128_t a_ = wasm_v128_load(&ptr[0]); - v128_t b_ = wasm_v128_load(&ptr[16]); - v128_t c_ = wasm_v128_load(&ptr[32]); - v128_t d_ = wasm_v128_load(&ptr[48]); - - v128_t a_low_b_low = wasm_i8x16_shuffle(a_, b_, 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29); - v128_t a_high_b_high = wasm_i8x16_shuffle(c_, d_, 0, 4, 8, 12, 16, 20, 24, - 28, 1, 5, 9, 13, 17, 21, 25, 29); - v128_t a = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 0, 1, 2, 3, 4, 5, - 6, 7, 16, 17, 18, 19, 20, 21, 22, 23); - v128_t b = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 8, 9, 10, 11, 12, - 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31); - - v128_t c_low_d_low = wasm_i8x16_shuffle(a_, b_, 2, 6, 10, 14, 18, 22, 26, - 30, 3, 7, 11, 15, 19, 23, 27, 31); - v128_t c_high_d_high = wasm_i8x16_shuffle(c_, d_, 2, 6, 10, 14, 18, 22, 26, - 30, 3, 7, 11, 15, 19, 23, 27, 31); - v128_t c = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 0, 1, 2, 3, 4, 5, - 6, 7, 16, 17, 18, 19, 20, 21, 22, 23); - v128_t d = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 8, 9, 10, 11, 12, - 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31); - - simde_uint8x16_private r_[4]; - r_[0].v128 = a; - r_[1].v128 = b; - r_[2].v128 = c; - r_[3].v128 = d; - simde_uint8x16x4_t s_ = {{simde_uint8x16_from_private(r_[0]), - simde_uint8x16_from_private(r_[1]), - simde_uint8x16_from_private(r_[2]), - simde_uint8x16_from_private(r_[3])}}; - return s_; - #else - simde_uint8x16_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]), - simde_uint8x16_from_private(a_[2]), simde_uint8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_u8 - #define vld4q_u8(a) simde_vld4q_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x4_t -simde_vld4q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_u16(ptr); - #else - simde_uint16x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]), - simde_uint16x8_from_private(a_[2]), simde_uint16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_u16 - #define vld4q_u16(a) simde_vld4q_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x4_t -simde_vld4q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_u32(ptr); - #else - simde_uint32x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]), - simde_uint32x4_from_private(a_[2]), simde_uint32x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_u32 - #define vld4q_u32(a) simde_vld4q_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x4_t -simde_vld4q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4q_u64(ptr); - #else - simde_uint64x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]), - simde_uint64x2_from_private(a_[2]), simde_uint64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_u64 - #define vld4q_u64(a) simde_vld4q_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x4_t -simde_vld4_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_p8(ptr); - #else - simde_poly8x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly8x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly8x8x4_t s_ = { { simde_poly8x8_from_private(a_[0]), simde_poly8x8_from_private(a_[1]), - simde_poly8x8_from_private(a_[2]), simde_poly8x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_p8 - #define vld4_p8(a) simde_vld4_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x4_t -simde_vld4_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_p16(ptr); - #else - simde_poly16x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly16x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly16x4x4_t s_ = { { simde_poly16x4_from_private(a_[0]), simde_poly16x4_from_private(a_[1]), - simde_poly16x4_from_private(a_[2]), simde_poly16x4_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_p16 - #define vld4_p16(a) simde_vld4_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x4_t -simde_vld4_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4_p64(ptr); - #else - simde_poly64x1_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly64x1_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly64x1x4_t s_ = { { simde_poly64x1_from_private(a_[0]), simde_poly64x1_from_private(a_[1]), - simde_poly64x1_from_private(a_[2]), simde_poly64x1_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_p64 - #define vld4_p64(a) simde_vld4_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x4_t -simde_vld4q_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_p8(ptr); - #else - simde_poly8x16_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly8x16_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly8x16x4_t s_ = { { simde_poly8x16_from_private(a_[0]), simde_poly8x16_from_private(a_[1]), - simde_poly8x16_from_private(a_[2]), simde_poly8x16_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_p8 - #define vld4q_p8(a) simde_vld4q_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x4_t -simde_vld4q_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4q_p16(ptr); - #else - simde_poly16x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly16x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly16x8x4_t s_ = { { simde_poly16x8_from_private(a_[0]), simde_poly16x8_from_private(a_[1]), - simde_poly16x8_from_private(a_[2]), simde_poly16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_p16 - #define vld4q_p16(a) simde_vld4q_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x4_t -simde_vld4q_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4q_p64(ptr); - #else - simde_poly64x2_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_poly64x2_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_poly64x2x4_t s_ = { { simde_poly64x2_from_private(a_[0]), simde_poly64x2_from_private(a_[1]), - simde_poly64x2_from_private(a_[2]), simde_poly64x2_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_p64 - #define vld4q_p64(a) simde_vld4q_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x4_t -simde_vld4_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld4_bf16(ptr); - #else - simde_bfloat16x4_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_bfloat16x4_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_bfloat16x4x4_t s_ = { { simde_bfloat16x4_from_private(a_[0]), simde_bfloat16x4_from_private(a_[1]), - simde_bfloat16x4_from_private(a_[2]), simde_bfloat16x4_from_private(a_[3]) } }; - return (s_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4_bf16 - #define vld4_bf16(a) simde_vld4_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x4_t -simde_vld4q_bf16(simde_bfloat16 const ptr[HEDLEY_ARRAY_PARAM(32)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld4q_bf16(ptr); - #else - simde_bfloat16x8_private a_[4]; - for (size_t i = 0; i < (sizeof(simde_bfloat16x8_t) / sizeof(*ptr)) * 4 ; i++) { - a_[i % 4].values[i / 4] = ptr[i]; - } - simde_bfloat16x8x4_t s_ = { { simde_bfloat16x8_from_private(a_[0]), simde_bfloat16x8_from_private(a_[1]), - simde_bfloat16x8_from_private(a_[2]), simde_bfloat16x8_from_private(a_[3]) } }; - return s_; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_bf16 - #define vld4q_bf16(a) simde_vld4q_bf16((a)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD4_H) */ -/* :: End simde/arm/neon/ld4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld4_dup.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_LD4_DUP_H) -#define SIMDE_ARM_NEON_LD4_DUP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x4_t -simde_vld4_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld4_dup_f16(ptr); - #else - simde_float16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_f16 - #define vld4_dup_f16(a) simde_vld4_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x4_t -simde_vld4_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_f32(ptr); - #else - simde_float32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_f32 - #define vld4_dup_f32(a) simde_vld4_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x4_t -simde_vld4_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4_dup_f64(ptr); - #else - simde_float64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_f64 - #define vld4_dup_f64(a) simde_vld4_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x4_t -simde_vld4_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_s8(ptr); - #else - simde_int8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_s8 - #define vld4_dup_s8(a) simde_vld4_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x4_t -simde_vld4_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_s16(ptr); - #else - simde_int16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_s16 - #define vld4_dup_s16(a) simde_vld4_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x4_t -simde_vld4_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_s32(ptr); - #else - simde_int32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_s32 - #define vld4_dup_s32(a) simde_vld4_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x4_t -simde_vld4_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_s64(ptr); - #else - simde_int64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_s64 - #define vld4_dup_s64(a) simde_vld4_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x4_t -simde_vld4_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_u8(ptr); - #else - simde_uint8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_u8 - #define vld4_dup_u8(a) simde_vld4_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x4_t -simde_vld4_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_u16(ptr); - #else - simde_uint16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_u16 - #define vld4_dup_u16(a) simde_vld4_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x4_t -simde_vld4_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_u32(ptr); - #else - simde_uint32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_u32 - #define vld4_dup_u32(a) simde_vld4_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x4_t -simde_vld4_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld4_dup_u64(ptr); - #else - simde_uint64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_u64 - #define vld4_dup_u64(a) simde_vld4_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x4_t -simde_vld4q_dup_f16(simde_float16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vld4q_dup_f16(ptr); - #else - simde_float16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_f16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_f16 - #define vld4q_dup_f16(a) simde_vld4q_dup_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x4_t -simde_vld4q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_f32(ptr); - #else - simde_float32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_f32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_f32 - #define vld4q_dup_f32(a) simde_vld4q_dup_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x4_t -simde_vld4q_dup_f64(simde_float64 const * ptr) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld4q_dup_f64(ptr); - #else - simde_float64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_f64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_f64 - #define vld4q_dup_f64(a) simde_vld4q_dup_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x4_t -simde_vld4q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_s8(ptr); - #else - simde_int8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_s8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_s8 - #define vld4q_dup_s8(a) simde_vld4q_dup_s8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x4_t -simde_vld4q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_s16(ptr); - #else - simde_int16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_s16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_s16 - #define vld4q_dup_s16(a) simde_vld4q_dup_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x4_t -simde_vld4q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_s32(ptr); - #else - simde_int32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_s32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_s32 - #define vld4q_dup_s32(a) simde_vld4q_dup_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x4_t -simde_vld4q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_s64(ptr); - #else - simde_int64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_s64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_s64 - #define vld4q_dup_s64(a) simde_vld4q_dup_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x4_t -simde_vld4q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_u8(ptr); - #else - simde_uint8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_u8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_u8 - #define vld4q_dup_u8(a) simde_vld4q_dup_u8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x4_t -simde_vld4q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_u16(ptr); - #else - simde_uint16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_u16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_u16 - #define vld4q_dup_u16(a) simde_vld4q_dup_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x4_t -simde_vld4q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_u32(ptr); - #else - simde_uint32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_u32(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_u32 - #define vld4q_dup_u32(a) simde_vld4q_dup_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x4_t -simde_vld4q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_u64(ptr); - #else - simde_uint64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_u64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_u64 - #define vld4q_dup_u64(a) simde_vld4q_dup_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x4_t -simde_vld4_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld4_dup_p8(ptr); - #else - simde_poly8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_p8 - #define vld4_dup_p8(a) simde_vld4_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x4_t -simde_vld4_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld4_dup_p16(ptr); - #else - simde_poly16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_p16 - #define vld4_dup_p16(a) simde_vld4_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x4_t -simde_vld4_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4_dup_p64(ptr); - #else - simde_poly64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_p64 - #define vld4_dup_p64(a) simde_vld4_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x4_t -simde_vld4q_dup_p8(simde_poly8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld4q_dup_p8(ptr); - #else - simde_poly8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_p8(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_p8 - #define vld4q_dup_p8(a) simde_vld4q_dup_p8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x4_t -simde_vld4q_dup_p16(simde_poly16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vld4q_dup_p16(ptr); - #else - simde_poly16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_p16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_p16 - #define vld4q_dup_p16(a) simde_vld4q_dup_p16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x4_t -simde_vld4q_dup_p64(simde_poly64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vld4q_dup_p64(ptr); - #else - simde_poly64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_p64(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_p64 - #define vld4q_dup_p64(a) simde_vld4q_dup_p64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x4_t -simde_vld4_dup_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld4_dup_bf16(ptr); - #else - simde_bfloat16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdup_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4_dup_bf16 - #define vld4_dup_bf16(a) simde_vld4_dup_bf16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x4_t -simde_vld4q_dup_bf16(simde_bfloat16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - return vld4q_dup_bf16(ptr); - #else - simde_bfloat16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - r.val[i] = simde_vdupq_n_bf16(ptr[i]); - } - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_dup_bf16 - #define vld4q_dup_bf16(a) simde_vld4q_dup_bf16((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD3_DUP_H) */ -/* :: End simde/arm/neon/ld4_dup.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/ld4_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* In older versions of clang, __builtin_neon_vld4_lane_v would - * generate a diagnostic for most variants (those which didn't - * use signed 8-bit integers). I believe this was fixed by - * 78ad22e0cc6390fcd44b2b7b5132f1b960ff975d. - * - * Since we have to use macros (due to the immediate-mode parameter) - * we can't just disable it once in this file; we have to use statement - * exprs and push / pop the stack for each macro. */ - -#if !defined(SIMDE_ARM_NEON_LD4_LANE_H) -#define SIMDE_ARM_NEON_LD4_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x4_t -simde_vld4_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_s8(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s8(ptr, src, lane)) - #else - #define simde_vld4_lane_s8(ptr, src, lane) vld4_lane_s8(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_s8 - #define vld4_lane_s8(ptr, src, lane) simde_vld4_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x4_t -simde_vld4_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_s16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s16(ptr, src, lane)) - #else - #define simde_vld4_lane_s16(ptr, src, lane) vld4_lane_s16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_s16 - #define vld4_lane_s16(ptr, src, lane) simde_vld4_lane_s16((ptr), (src), (lane)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x4_t -simde_vld4_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_s32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s32(ptr, src, lane)) - #else - #define simde_vld4_lane_s32(ptr, src, lane) vld4_lane_s32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_s32 - #define vld4_lane_s32(ptr, src, lane) simde_vld4_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1x4_t -simde_vld4_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_int64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x1_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_s64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s64(ptr, src, lane)) - #else - #define simde_vld4_lane_s64(ptr, src, lane) vld4_lane_s64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_s64 - #define vld4_lane_s64(ptr, src, lane) simde_vld4_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x4_t -simde_vld4_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_u8(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u8(ptr, src, lane)) - #else - #define simde_vld4_lane_u8(ptr, src, lane) vld4_lane_u8(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_u8 - #define vld4_lane_u8(ptr, src, lane) simde_vld4_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x4_t -simde_vld4_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_u16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u16(ptr, src, lane)) - #else - #define simde_vld4_lane_u16(ptr, src, lane) vld4_lane_u16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_u16 - #define vld4_lane_u16(ptr, src, lane) simde_vld4_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x4_t -simde_vld4_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_u32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u32(ptr, src, lane)) - #else - #define simde_vld4_lane_u32(ptr, src, lane) vld4_lane_u32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_u32 - #define vld4_lane_u32(ptr, src, lane) simde_vld4_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1x4_t -simde_vld4_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_uint64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x1_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_u64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u64(ptr, src, lane)) - #else - #define simde_vld4_lane_u64(ptr, src, lane) vld4_lane_u64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_u64 - #define vld4_lane_u64(ptr, src, lane) simde_vld4_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x4_t -simde_vld4_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_f16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f16(ptr, src, lane)) - #else - #define simde_vld4_lane_f16(ptr, src, lane) vld4_lane_f16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_f16 - #define vld4_lane_f16(ptr, src, lane) simde_vld4_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x4_t -simde_vld4_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_f32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f32(ptr, src, lane)) - #else - #define simde_vld4_lane_f32(ptr, src, lane) vld4_lane_f32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_f32 - #define vld4_lane_f32(ptr, src, lane) simde_vld4_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1x4_t -simde_vld4_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x1_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4_lane_f64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f64(ptr, src, lane)) - #else - #define simde_vld4_lane_f64(ptr, src, lane) vld4_lane_f64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_f64 - #define vld4_lane_f64(ptr, src, lane) simde_vld4_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x4_t -simde_vld4q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_int8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int8x16_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_s8(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s8(ptr, src, lane)) - #else - #define simde_vld4q_lane_s8(ptr, src, lane) vld4q_lane_s8(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_s8 - #define vld4q_lane_s8(ptr, src, lane) simde_vld4q_lane_s8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x4_t -simde_vld4q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int16x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_s16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s16(ptr, src, lane)) - #else - #define simde_vld4q_lane_s16(ptr, src, lane) vld4q_lane_s16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_s16 - #define vld4q_lane_s16(ptr, src, lane) simde_vld4q_lane_s16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x4_t -simde_vld4q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int32x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_s32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s32(ptr, src, lane)) - #else - #define simde_vld4q_lane_s32(ptr, src, lane) vld4q_lane_s32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_s32 - #define vld4q_lane_s32(ptr, src, lane) simde_vld4q_lane_s32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2x4_t -simde_vld4q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_int64x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_s64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s64(ptr, src, lane)) - #else - #define simde_vld4q_lane_s64(ptr, src, lane) vld4q_lane_s64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_s64 - #define vld4q_lane_s64(ptr, src, lane) simde_vld4q_lane_s64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x4_t -simde_vld4q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_uint8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint8x16_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_u8(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u8(ptr, src, lane)) - #else - #define simde_vld4q_lane_u8(ptr, src, lane) vld4q_lane_u8(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_u8 - #define vld4q_lane_u8(ptr, src, lane) simde_vld4q_lane_u8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x4_t -simde_vld4q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint16x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_u16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u16(ptr, src, lane)) - #else - #define simde_vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_u16 - #define vld4q_lane_u16(ptr, src, lane) simde_vld4q_lane_u16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x4_t -simde_vld4q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint32x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_u32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u32(ptr, src, lane)) - #else - #define simde_vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_u32 - #define vld4q_lane_u32(ptr, src, lane) simde_vld4q_lane_u32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2x4_t -simde_vld4q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_uint64x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_u64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u64(ptr, src, lane)) - #else - #define simde_vld4q_lane_u64(ptr, src, lane) vld4q_lane_u64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_u64 - #define vld4q_lane_u64(ptr, src, lane) simde_vld4q_lane_u64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x4_t -simde_vld4q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float16x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_f16(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f16(ptr, src, lane)) - #else - #define simde_vld4q_lane_f16(ptr, src, lane) vld4q_lane_f16(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_f16 - #define vld4q_lane_f16(ptr, src, lane) simde_vld4q_lane_f16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x4_t -simde_vld4q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float32x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_f32(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f32(ptr, src, lane)) - #else - #define simde_vld4q_lane_f32(ptr, src, lane) vld4q_lane_f32(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_f32 - #define vld4q_lane_f32(ptr, src, lane) simde_vld4q_lane_f32((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2x4_t -simde_vld4q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_float64x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) - #define simde_vld4q_lane_f64(ptr, src, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f64(ptr, src, lane)) - #else - #define simde_vld4q_lane_f64(ptr, src, lane) vld4q_lane_f64(ptr, src, lane) - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_f64 - #define vld4q_lane_f64(ptr, src, lane) simde_vld4q_lane_f64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x4_t -simde_vld4_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly8x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly8x8_private tmp_ = simde_poly8x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld4_lane_p8(ptr, src, lane) vld4_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_p8 - #define vld4_lane_p8(ptr, src, lane) simde_vld4_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x4_t -simde_vld4_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly16x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly16x4_private tmp_ = simde_poly16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld4_lane_p16(ptr, src, lane) vld4_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_p16 - #define vld4_lane_p16(ptr, src, lane) simde_vld4_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1x4_t -simde_vld4_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x1x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64x1x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly64x1_private tmp_ = simde_poly64x1_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x1_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld4_lane_p64(ptr, src, lane) vld4_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_p64 - #define vld4_lane_p64(ptr, src, lane) simde_vld4_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x4_t -simde_vld4q_lane_p8(simde_poly8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly8x16x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8x16x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly8x16_private tmp_ = simde_poly8x16_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly8x16_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld4q_lane_p8(ptr, src, lane) vld4q_lane_p8(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_p8 - #define vld4q_lane_p8(ptr, src, lane) simde_vld4q_lane_p8((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x4_t -simde_vld4q_lane_p16(simde_poly16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly16x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly16x8_private tmp_ = simde_poly16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly16x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vld4q_lane_p16(ptr, src, lane) vld4q_lane_p16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_p16 - #define vld4q_lane_p16(ptr, src, lane) simde_vld4q_lane_p16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2x4_t -simde_vld4q_lane_p64(simde_poly64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x2x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64x2x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_poly64x2_private tmp_ = simde_poly64x2_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_poly64x2_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vld4q_lane_p64(ptr, src, lane) vld4q_lane_p64(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_p64 - #define vld4q_lane_p64(ptr, src, lane) simde_vld4q_lane_p64((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4x4_t -simde_vld4_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x4x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16x4x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_bfloat16x4_private tmp_ = simde_bfloat16x4_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x4_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld4_lane_bf16(ptr, src, lane) vld4_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4_lane_bf16 - #define vld4_lane_bf16(ptr, src, lane) simde_vld4_lane_bf16((ptr), (src), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8x4_t -simde_vld4q_lane_bf16(simde_bfloat16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x8x4_t src, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16x8x4_t r; - - for (size_t i = 0 ; i < 4 ; i++) { - simde_bfloat16x8_private tmp_ = simde_bfloat16x8_to_private(src.val[i]); - tmp_.values[lane] = ptr[i]; - r.val[i] = simde_bfloat16x8_from_private(tmp_); - } - - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - #define simde_vld4q_lane_bf16(ptr, src, lane) vld4q_lane_bf16(ptr, src, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vld4q_lane_bf16 - #define vld4q_lane_bf16(ptr, src, lane) simde_vld4q_lane_bf16((ptr), (src), (lane)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_LD4_LANE_H) */ -/* :: End simde/arm/neon/ld4_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/max.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MAX_H) -#define SIMDE_ARM_NEON_MAX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxh_f16(a, b); - #else - simde_float32_t r_; - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - #if !defined(SIMDE_FAST_NANS) - r_ = (a_ >= b_) ? a_ : ((a_ < b_) ? b_ : SIMDE_MATH_NANF); - #else - r_ = (a_ > b_) ? a_ : b_; - #endif - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmaxh_f16 - #define vmaxh_f16(a, b) simde_vmaxh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmax_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmax_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmaxh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmax_f16 - #define vmax_f16(a, b) simde_vmax_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); - #else - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_f32 - #define vmax_f32(a, b) simde_vmax_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmax_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); - #else - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmax_f64 - #define vmax_f64(a, b) simde_vmax_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s8(simde_vcgt_s8(a, b), a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_s8 - #define vmax_s8(a, b) simde_vmax_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s16(simde_vcgt_s16(a, b), a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_s16 - #define vmax_s16(a, b) simde_vmax_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s32(simde_vcgt_s32(a, b), a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_s32 - #define vmax_s32(a, b) simde_vmax_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s64(simde_vcgt_s64(a, b), a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int64x1_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u8(simde_vcgt_u8(a, b), a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_u8 - #define vmax_u8(a, b) simde_vmax_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_u16(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE) - return simde_vbsl_u16(simde_vcgt_u16(a, b), a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - r_.m64 = _mm_add_pi16(b_.m64, _mm_subs_pu16(a_.m64, b_.m64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_u16 - #define vmax_u16(a, b) simde_vmax_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmax_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u32(simde_vcgt_u32(a, b), a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmax_u32 - #define vmax_u32(a, b) simde_vmax_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u64(simde_vcgt_u64(a, b), a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint64x1_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmaxq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmaxh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmaxq_f16 - #define vmaxq_f16(a, b) simde_vmaxq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return - vec_sel( - b, - a, - vec_orc( - vec_cmpgt(a, b), - vec_cmpeq(a, a) - ) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) cmpres = vec_cmpeq(a, a); - return - vec_sel( - b, - a, - vec_or( - vec_cmpgt(a, b), - vec_nor(cmpres, cmpres) - ) - ); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS) - r_.m128 = _mm_max_ps(a_.m128, b_.m128); - #elif defined(SIMDE_X86_SSE_NATIVE) - __m128 m = _mm_or_ps(_mm_cmpneq_ps(a_.m128, a_.m128), _mm_cmpgt_ps(a_.m128, b_.m128)); - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_blendv_ps(b_.m128, a_.m128, m); - #else - r_.m128 = - _mm_or_ps( - _mm_and_ps(m, a_.m128), - _mm_andnot_ps(m, b_.m128) - ); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_max(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); - #else - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_f32 - #define vmaxq_f32(a, b) simde_vmaxq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmaxq_f64(a, b); - #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) - return vec_max(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS) - r_.m128d = _mm_max_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128d m = _mm_or_pd(_mm_cmpneq_pd(a_.m128d, a_.m128d), _mm_cmpgt_pd(a_.m128d, b_.m128d)); - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_blendv_pd(b_.m128d, a_.m128d, m); - #else - r_.m128d = - _mm_or_pd( - _mm_and_pd(m, a_.m128d), - _mm_andnot_pd(m, b_.m128d) - ); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_max(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); - #else - r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxq_f64 - #define vmaxq_f64(a, b) simde_vmaxq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_max_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a_.m128i, b_.m128i); - r_.m128i = _mm_or_si128(_mm_and_si128(m, a_.m128i), _mm_andnot_si128(m, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_max(a_.v128, b_.v128); - #endif - - return simde_int8x16_from_private(r_); - #else - return simde_vbslq_s8(simde_vcgtq_s8(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_s8 - #define vmaxq_s8(a, b) simde_vmaxq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_max_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_max(a_.v128, b_.v128); - #endif - - return simde_int16x8_from_private(r_); - #else - return simde_vbslq_s16(simde_vcgtq_s16(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_s16 - #define vmaxq_s16(a, b) simde_vmaxq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE4_1_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_max_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_max(a_.v128, b_.v128); - #endif - - return simde_int32x4_from_private(r_); - #else - return simde_vbslq_s32(simde_vcgtq_s32(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_s32 - #define vmaxq_s32(a, b) simde_vmaxq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_x_vmaxq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #else - return simde_vbslq_s64(simde_vcgtq_s64(a, b), a, b); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_max_epu8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_max(a_.v128, b_.v128); - #endif - - return simde_uint8x16_from_private(r_); - #else - return simde_vbslq_u8(simde_vcgtq_u8(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_u8 - #define vmaxq_u8(a, b) simde_vmaxq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_max_epu16(a_.m128i, b_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - r_.m128i = _mm_add_epi16(b_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_max(a_.v128, b_.v128); - #endif - - return simde_uint16x8_from_private(r_); - #else - return simde_vbslq_u16(simde_vcgtq_u16(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_u16 - #define vmaxq_u16(a, b) simde_vmaxq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmaxq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #elif \ - defined(SIMDE_X86_SSE4_1_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_max_epu32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_max(a_.v128, b_.v128); - #endif - - return simde_uint32x4_from_private(r_); - #else - return simde_vbslq_u32(simde_vcgtq_u32(a, b), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxq_u32 - #define vmaxq_u32(a, b) simde_vmaxq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_x_vmaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_max(a, b); - #else - return simde_vbslq_u64(simde_vcgtq_u64(a, b), a, b); - #endif -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MAX_H) */ -/* :: End simde/arm/neon/max.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/maxnm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MAXNM_H) -#define SIMDE_ARM_NEON_MAXNM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxnmh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vmaxnmh_f16(a, b); - #else - #if defined(simde_math_fmaxf) - return simde_float16_from_float32(simde_math_fmaxf(simde_float16_to_float32(a), simde_float16_to_float32(b))); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - simde_float32_t r_; - if (a_ > b_) { - r_ = a_; - } else if (a_ < b_) { - r_ = b_; - } else if (a_ == a_) { - r_ = a_; - } else { - r_ = b_; - } - return simde_float16_from_float32(r_); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmh_f16 - #define vmaxnmh_f16(a, b) simde_vmaxnmh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmaxnm_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vmaxnm_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmaxnmh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnm_f16 - #define vmaxnm_f16(a, b) simde_vmaxnm_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmaxnmq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vmaxnmq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmaxnmh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmq_f16 - #define vmaxnmq_f16(a, b) simde_vmaxnmq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) - return vmaxnm_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmaxf) - r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]); - #else - if (a_.values[i] > b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] < b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxnm_f32 - #define vmaxnm_f32(a, b) simde_vmaxnm_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmaxnm_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmaxnm_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmax) - r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]); - #else - if (a_.values[i] > b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] < b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnm_f64 - #define vmaxnm_f64(a, b) simde_vmaxnm_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) - return vmaxnmq_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_max(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) - #if !defined(SIMDE_FAST_NANS) - __m128 r = _mm_max_ps(a_.m128, b_.m128); - __m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128); - r = _mm_andnot_ps(bnan, r); - r = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan)); - r_.m128 = r; - #else - r_.m128 = _mm_max_ps(a_.m128, b_.m128); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.v128 = wasm_f32x4_max(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmaxf) - r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]); - #else - if (a_.values[i] > b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] < b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmaxnmq_f32 - #define vmaxnmq_f32(a, b) simde_vmaxnmq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmaxnmq_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_max(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - #if !defined(SIMDE_FAST_NANS) - __m128d r = _mm_max_pd(a_.m128d, b_.m128d); - __m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d); - r = _mm_andnot_pd(bnan, r); - r = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan)); - r_.m128d = r; - #else - r_.m128d = _mm_max_pd(a_.m128d, b_.m128d); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.v128 = wasm_f64x2_max(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmax) - r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]); - #else - if (a_.values[i] > b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] < b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmq_f64 - #define vmaxnmq_f64(a, b) simde_vmaxnmq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MAXNM_H) */ -/* :: End simde/arm/neon/maxnm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/maxnmv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MAXNMV_H) -#define SIMDE_ARM_NEON_MAXNMV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmaxnmv_f32(simde_float32x2_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxnmv_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - - r = -SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmv_f32 - #define vmaxnmv_f32(v) simde_vmaxnmv_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmaxnmvq_f32(simde_float32x4_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxnmvq_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - r = -SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmvq_f32 - #define vmaxnmvq_f32(v) simde_vmaxnmvq_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmaxnmvq_f64(simde_float64x2_t a) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxnmvq_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - r = -SIMDE_MATH_INFINITY; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmvq_f64 - #define vmaxnmvq_f64(v) simde_vmaxnmvq_f64(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxnmv_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxnmv_f16(a); - #else - simde_float32_t r_ = simde_float16_to_float32(SIMDE_NINFINITYHF); - simde_float16x4_private a_ = simde_float16x4_to_private(a); - - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(max:r_) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t tmp_a = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r_ = tmp_a > r_ ? tmp_a : r_; - #else - r_ = (tmp_a > r_) ? tmp_a : ((tmp_a <= r_) ? r_ : ((tmp_a == tmp_a) ? r_ : tmp_a)); - #endif - } - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmv_f16 - #define vmaxnmv_f16(v) simde_vmaxnmv_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxnmvq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxnmvq_f16(a); - #else - simde_float32_t r_ = simde_float16_to_float32(SIMDE_NINFINITYHF); - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(max:r_) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t tmp_a = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r_ = tmp_a > r_ ? tmp_a : r_; - #else - r_ = (tmp_a > r_) ? tmp_a : ((tmp_a <= r_) ? r_ : ((tmp_a == tmp_a) ? r_ : tmp_a)); - #endif - } - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxnmvq_f16 - #define vmaxnmvq_f16(v) simde_vmaxnmvq_f16(v) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MAXNMV_H) */ -/* :: End simde/arm/neon/maxnmv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/maxv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MAXV_H) -#define SIMDE_ARM_NEON_MAXV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxv_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxv_f16(a); - #else - simde_float32_t r; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - - r = simde_float16_to_float32(SIMDE_NINFINITYHF); - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(max:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t a32 = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r = a32 > r ? a32 : r; - #else - r = a32 > r ? a32 : (a32 <= r ? r : ((a32 == a32) ? r : a32)); - #endif - } - - return simde_float16_from_float32(r); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_f16 - #define vmaxv_f16(v) simde_vmaxv_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmaxv_f32(simde_float32x2_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - - r = -SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_f32 - #define vmaxv_f32(v) simde_vmaxv_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vmaxv_s8(simde_int8x8_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_s8(a); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - - r = INT8_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_s8 - #define vmaxv_s8(v) simde_vmaxv_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vmaxv_s16(simde_int16x4_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_s16(a); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - r = INT16_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_s16 - #define vmaxv_s16(v) simde_vmaxv_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vmaxv_s32(simde_int32x2_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_s32(a); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - r = INT32_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_s32 - #define vmaxv_s32(v) simde_vmaxv_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vmaxv_u8(simde_uint8x8_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_u8(a); - #else - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_u8 - #define vmaxv_u8(v) simde_vmaxv_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vmaxv_u16(simde_uint16x4_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_u16(a); - #else - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_u16 - #define vmaxv_u16(v) simde_vmaxv_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vmaxv_u32(simde_uint32x2_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxv_u32(a); - #else - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxv_u32 - #define vmaxv_u32(v) simde_vmaxv_u32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmaxvq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmaxvq_f16(a); - #else - simde_float32_t r; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - r = simde_float16_to_float32(SIMDE_NINFINITYHF); - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(max:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t a32 = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r = a32 > r ? a32 : r; - #else - r = a32 > r ? a32 : (a32 <= r ? r : ((a32 == a32) ? r : a32)); - #endif - } - - return simde_float16_from_float32(r); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_f16 - #define vmaxvq_f16(v) simde_vmaxvq_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmaxvq_f32(simde_float32x4_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - r = -SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_f32 - #define vmaxvq_f32(v) simde_vmaxvq_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmaxvq_f64(simde_float64x2_t a) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - r = -SIMDE_MATH_INFINITY; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_f64 - #define vmaxvq_f64(v) simde_vmaxvq_f64(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vmaxvq_s8(simde_int8x16_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_s8(a); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - r = INT8_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_s8 - #define vmaxvq_s8(v) simde_vmaxvq_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vmaxvq_s16(simde_int16x8_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_s16(a); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - r = INT16_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_s16 - #define vmaxvq_s16(v) simde_vmaxvq_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vmaxvq_s32(simde_int32x4_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_s32(a); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - r = INT32_MIN; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_s32 - #define vmaxvq_s32(v) simde_vmaxvq_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vmaxvq_u8(simde_uint8x16_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_u8(a); - #else - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_u8 - #define vmaxvq_u8(v) simde_vmaxvq_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vmaxvq_u16(simde_uint16x8_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_u16(a); - #else - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_u16 - #define vmaxvq_u16(v) simde_vmaxvq_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vmaxvq_u32(simde_uint32x4_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vmaxvq_u32(a); - #else - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] > r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmaxvq_u32 - #define vmaxvq_u32(v) simde_vmaxvq_u32(v) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MAXV_H) */ -/* :: End simde/arm/neon/maxv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/min.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MIN_H) -#define SIMDE_ARM_NEON_MIN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminh_f16(a, b); - #else - simde_float32_t r_; - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - #if !defined(SIMDE_FAST_NANS) - r_ = (a_ <= b_) ? a_ : ((a_ > b_) ? b_ : SIMDE_MATH_NANF); - #else - r_ = (a_ < b_) ? a_ : b_; - #endif - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminh_f16 - #define vminh_f16(a, b) simde_vminh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmin_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmin_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vminh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmin_f16 - #define vmin_f16(a, b) simde_vmin_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmin_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_f32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) - simde_float32x2_t r = simde_vbsl_f32(simde_vcgt_f32(b, a), a, b); - - #if !defined(SIMDE_FAST_NANS) - r = simde_vbsl_f32(simde_vceq_f32(a, a), simde_vbsl_f32(simde_vceq_f32(b, b), r, b), a); - #endif - - return r; - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - if (simde_math_isnanf(a_.values[i])) { - r_.values[i] = a_.values[i]; - } else if (simde_math_isnanf(b_.values[i])) { - r_.values[i] = b_.values[i]; - } else { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #else - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_f32 - #define vmin_f32(a, b) simde_vmin_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmin_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmin_f64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) - simde_float64x1_t r = simde_vbsl_f64(simde_vcgt_f64(b, a), a, b); - - #if !defined(SIMDE_FAST_NANS) - r = simde_vbsl_f64(simde_vceq_f64(a, a), simde_vbsl_f64(simde_vceq_f64(b, b), r, b), a); - #endif - - return r; - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - if (simde_math_isnan(a_.values[i])) { - r_.values[i] = a_.values[i]; - } else if (simde_math_isnan(b_.values[i])) { - r_.values[i] = b_.values[i]; - } else { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #else - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmin_f64 - #define vmin_f64(a, b) simde_vmin_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmin_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s8(simde_vcgt_s8(b, a), a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_s8 - #define vmin_s8(a, b) simde_vmin_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmin_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s16(simde_vcgt_s16(b, a), a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_s16 - #define vmin_s16(a, b) simde_vmin_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmin_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s32(simde_vcgt_s32(b, a), a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_s32 - #define vmin_s32(a, b) simde_vmin_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_x_vmin_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_s64(simde_vcgt_s64(b, a), a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_int64x1_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u8(simde_vcgt_u8(b, a), a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_u8 - #define vmin_u8(a, b) simde_vmin_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_u16(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE) - return simde_vbsl_u16(simde_vcgt_u16(b, a), a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(a_.m64, b_.m64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_u16 - #define vmin_u16(a, b) simde_vmin_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmin_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u32(simde_vcgt_u32(b, a), a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmin_u32 - #define vmin_u32(a, b) simde_vmin_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_x_vmin_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vbsl_u64(simde_vcgt_u64(b, a), a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint64x1_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vminq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vminh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vminq_f16 - #define vminq_f16(a, b) simde_vminq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vminq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_f32(a, b); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) - return vec_min(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS) - r_.m128 = _mm_min_ps(a_.m128, b_.m128); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_blendv_ps(_mm_set1_ps(SIMDE_MATH_NANF), _mm_min_ps(a_.m128, b_.m128), _mm_cmpord_ps(a_.m128, b_.m128)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f32x4_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - if (simde_math_isnanf(a_.values[i])) { - r_.values[i] = a_.values[i]; - } else if (simde_math_isnanf(b_.values[i])) { - r_.values[i] = b_.values[i]; - } else { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #else - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_f32 - #define vminq_f32(a, b) simde_vminq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vminq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vminq_f64(a, b); - #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) - return vec_min(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS) - r_.m128d = _mm_min_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_blendv_pd(_mm_set1_pd(SIMDE_MATH_NAN), _mm_min_pd(a_.m128d, b_.m128d), _mm_cmpord_pd(a_.m128d, b_.m128d)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_f64x2_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if !defined(SIMDE_FAST_NANS) - if (simde_math_isnan(a_.values[i])) { - r_.values[i] = a_.values[i]; - } else if (simde_math_isnan(b_.values[i])) { - r_.values[i] = b_.values[i]; - } else { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #else - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - #endif - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminq_f64 - #define vminq_f64(a, b) simde_vminq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vminq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_s8 - #define vminq_s8(a, b) simde_vminq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vminq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_min_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_s16 - #define vminq_s16(a, b) simde_vminq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vminq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_s32 - #define vminq_s32(a, b) simde_vminq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_x_vminq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_min_epi64(a_.m128i, b_.m128i); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_min_epu8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_u8 - #define vminq_u8(a, b) simde_vminq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epu16(a_.m128i, b_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - r_.m128i = _mm_sub_epi16(a_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_u16 - #define vminq_u16(a, b) simde_vminq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vminq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epu32(a_.m128i, b_.m128i); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i i32_min = _mm_set1_epi32(INT32_MIN); - const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i); - __m128i m = - _mm_cmpeq_epi32( - /* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */ - _mm_and_si128( - difference, - _mm_xor_si128( - _mm_cmpgt_epi32( - _mm_xor_si128(difference, i32_min), - _mm_xor_si128(a_.m128i, i32_min) - ), - _mm_set1_epi32(~INT32_C(0)) - ) - ), - _mm_setzero_si128() - ); - r_.m128i = - _mm_or_si128( - _mm_and_si128(m, a_.m128i), - _mm_andnot_si128(m, b_.m128i) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u32x4_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminq_u32 - #define vminq_u32(a, b) simde_vminq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_x_vminq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - return vec_min(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; - } - - return simde_uint64x2_from_private(r_); - #endif -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MIN_H) */ -/* :: End simde/arm/neon/min.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/minnm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MINNM_H) -#define SIMDE_ARM_NEON_MINNM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminnmh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vminnmh_f16(a, b); - #else - #if defined(simde_math_fminf) - return simde_float16_from_float32(simde_math_fminf(simde_float16_to_float32(a), simde_float16_to_float32(b))); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - simde_float32_t b_ = simde_float16_to_float32(b); - simde_float32_t r_; - if (a_ < b_) { - r_ = a_; - } else if (a_ > b_) { - r_ = b_; - } else if (a_ == a_) { - r_ = a_; - } else { - r_ = b_; - } - return simde_float16_from_float32(r_); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vminnmh_f16 - #define vminnmh_f16(a, b) simde_vminnmh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vminnm_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vminnm_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vminnmh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vminnm_f16 - #define vminnm_f16(a, b) simde_vminnm_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vminnm_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) - return vminnm_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fminf) - r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]); - #else - if (a_.values[i] < b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] > b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminnm_f32 - #define vminnm_f32(a, b) simde_vminnm_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vminnm_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vminnm_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmin) - r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]); - #else - if (a_.values[i] < b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] > b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnm_f64 - #define vminnm_f64(a, b) simde_vminnm_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vminnmq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) && defined(SIMDE_ARM_NEON_FP16) - return vminnmq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vminnmh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vminnmq_f16 - #define vminnmq_f16(a, b) simde_vminnmq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) - return vminnmq_f32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) - return simde_vbslq_f32(simde_vcleq_f32(a, b), a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_min(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE_NATIVE) - #if !defined(SIMDE_FAST_NANS) - __m128 r = _mm_min_ps(a_.m128, b_.m128); - __m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128); - r = _mm_andnot_ps(bnan, r); - r_.m128 = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan)); - #else - r_.m128 = _mm_min_ps(a_.m128, b_.m128); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.v128 = wasm_f32x4_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fminf) - r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]); - #else - if (a_.values[i] < b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] > b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vminnmq_f32 - #define vminnmq_f32(a, b) simde_vminnmq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vminnmq_f64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) - return simde_vbslq_f64(simde_vcleq_f64(a, b), a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_min(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - #if !defined(SIMDE_FAST_NANS) - __m128d r = _mm_min_pd(a_.m128d, b_.m128d); - __m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d); - r = _mm_andnot_pd(bnan, r); - r_.m128d = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan)); - #else - r_.m128d = _mm_min_pd(a_.m128d, b_.m128d); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.v128 = wasm_f64x2_min(a_.v128, b_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if defined(simde_math_fmin) - r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]); - #else - if (a_.values[i] < b_.values[i]) { - r_.values[i] = a_.values[i]; - } else if (a_.values[i] > b_.values[i]) { - r_.values[i] = b_.values[i]; - } else if (a_.values[i] == a_.values[i]) { - r_.values[i] = a_.values[i]; - } else { - r_.values[i] = b_.values[i]; - } - #endif - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmq_f64 - #define vminnmq_f64(a, b) simde_vminnmq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MINNM_H) */ -/* :: End simde/arm/neon/minnm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/minnmv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MINNMV_H) -#define SIMDE_ARM_NEON_MINNMV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminnmv_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminnmv_f16(a); - #else - simde_float32_t r_ = simde_float16_to_float32(SIMDE_INFINITYHF); - simde_float16x4_private a_ = simde_float16x4_to_private(a); - - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r_) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t tmp_a = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r_ = tmp_a < r_ ? tmp_a : r_; - #else - r_ = (tmp_a < r_) ? tmp_a : ((tmp_a >= r_) ? r_ : ((tmp_a == tmp_a) ? r_ : tmp_a)); - #endif - } - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmv_f16 - #define vminnmv_f16(v) simde_vminnmv_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vminnmv_f32(simde_float32x2_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminnmv_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - - r = SIMDE_MATH_INFINITYF; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmv_f32 - #define vminnmv_f32(v) simde_vminnmv_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminnmvq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminnmvq_f16(a); - #else - simde_float32_t r_ = simde_float16_to_float32(SIMDE_INFINITYHF); - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r_) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t tmp_a = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r_ = tmp_a < r_ ? tmp_a : r_; - #else - r_ = (tmp_a < r_) ? tmp_a : ((tmp_a >= r_) ? r_ : ((tmp_a == tmp_a) ? r_ : tmp_a)); - #endif - } - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmvq_f16 - #define vminnmvq_f16(v) simde_vminnmvq_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vminnmvq_f32(simde_float32x4_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminnmvq_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - r = SIMDE_MATH_INFINITYF; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmvq_f32 - #define vminnmvq_f32(v) simde_vminnmvq_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vminnmvq_f64(simde_float64x2_t a) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminnmvq_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - r = SIMDE_MATH_INFINITY; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminnmvq_f64 - #define vminnmvq_f64(v) simde_vminnmvq_f64(v) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MINNMV_H) */ -/* :: End simde/arm/neon/minnmv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/minv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MINV_H) -#define SIMDE_ARM_NEON_MINV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminv_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminv_f16(a); - #else - simde_float32_t r; - simde_float16x4_private a_ = simde_float16x4_to_private(a); - - r = simde_float16_to_float32(SIMDE_INFINITYHF); - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t a32 = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r = a32 < r ? a32 : r; - #else - r = a32 < r ? a32 : (a32 >= r ? r : ((a32 == a32) ? r : a32)); - #endif - } - - return simde_float16_from_float32(r); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_f16 - #define vminv_f16(v) simde_vminv_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vminv_f32(simde_float32x2_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - - r = SIMDE_MATH_INFINITYF; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_f32 - #define vminv_f32(v) simde_vminv_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vminv_s8(simde_int8x8_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_s8(a); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - - r = INT8_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_s8 - #define vminv_s8(v) simde_vminv_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vminv_s16(simde_int16x4_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_s16(a); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - r = INT16_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_s16 - #define vminv_s16(v) simde_vminv_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vminv_s32(simde_int32x2_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_s32(a); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - r = INT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_s32 - #define vminv_s32(v) simde_vminv_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vminv_u8(simde_uint8x8_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_u8(a); - #else - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - r = UINT8_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_u8 - #define vminv_u8(v) simde_vminv_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vminv_u16(simde_uint16x4_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_u16(a); - #else - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - r = UINT16_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_u16 - #define vminv_u16(v) simde_vminv_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vminv_u32(simde_uint32x2_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminv_u32(a); - #else - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - r = UINT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminv_u32 - #define vminv_u32(v) simde_vminv_u32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vminvq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vminvq_f16(a); - #else - simde_float32_t r; - simde_float16x8_private a_ = simde_float16x8_to_private(a); - - r = simde_float16_to_float32(SIMDE_INFINITYHF); - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - simde_float32_t a32 = simde_float16_to_float32(a_.values[i]); - #if defined(SIMDE_FAST_NANS) - r = a32 < r ? a32 : r; - #else - r = a32 < r ? a32 : (a32 >= r ? r : ((a32 == a32) ? r : a32)); - #endif - } - - return simde_float16_from_float32(r); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_f16 - #define vminvq_f16(v) simde_vminvq_f16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vminvq_f32(simde_float32x4_t a) { - simde_float32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_f32(a); - #else - simde_float32x4_private a_ = simde_float32x4_to_private(a); - - r = SIMDE_MATH_INFINITYF; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_f32 - #define vminvq_f32(v) simde_vminvq_f32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vminvq_f64(simde_float64x2_t a) { - simde_float64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - - r = SIMDE_MATH_INFINITY; - #if defined(SIMDE_FAST_NANS) - SIMDE_VECTORIZE_REDUCTION(min:r) - #else - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - #if defined(SIMDE_FAST_NANS) - r = a_.values[i] < r ? a_.values[i] : r; - #else - r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); - #endif - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_f64 - #define vminvq_f64(v) simde_vminvq_f64(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vminvq_s8(simde_int8x16_t a) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_s8(a); - #else - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - r = INT8_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_s8 - #define vminvq_s8(v) simde_vminvq_s8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vminvq_s16(simde_int16x8_t a) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_s16(a); - #else - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - r = INT16_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_s16 - #define vminvq_s16(v) simde_vminvq_s16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vminvq_s32(simde_int32x4_t a) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_s32(a); - #else - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - r = INT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_s32 - #define vminvq_s32(v) simde_vminvq_s32(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vminvq_u8(simde_uint8x16_t a) { - uint8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_u8(a); - #else - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - r = UINT8_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_u8 - #define vminvq_u8(v) simde_vminvq_u8(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vminvq_u16(simde_uint16x8_t a) { - uint16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_u16(a); - #else - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - r = UINT16_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_u16 - #define vminvq_u16(v) simde_vminvq_u16(v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vminvq_u32(simde_uint32x4_t a) { - uint32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vminvq_u32(a); - #else - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - r = UINT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { - r = a_.values[i] < r ? a_.values[i] : r; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vminvq_u32 - #define vminvq_u32(v) simde_vminvq_u32(v) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MINV_H) */ -/* :: End simde/arm/neon/minv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mla.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MLA_H) -#define SIMDE_ARM_NEON_MLA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_f32(a, b, c); - #else - return simde_vadd_f32(simde_vmul_f32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_f32 - #define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmla_f64(a, b, c); - #else - return simde_vadd_f64(simde_vmul_f64(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_f64 - #define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_s8(a, b, c); - #else - return simde_vadd_s8(simde_vmul_s8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_s8 - #define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_s16(a, b, c); - #else - return simde_vadd_s16(simde_vmul_s16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_s16 - #define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_s32(a, b, c); - #else - return simde_vadd_s32(simde_vmul_s32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_s32 - #define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_u8(a, b, c); - #else - return simde_vadd_u8(simde_vmul_u8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_u8 - #define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_u16(a, b, c); - #else - return simde_vadd_u16(simde_vmul_u16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_u16 - #define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_u32(a, b, c); - #else - return simde_vadd_u32(simde_vmul_u32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_u32 - #define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_f32(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_madd(b, c, a); - #elif \ - defined(SIMDE_X86_FMA_NATIVE) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b), - c_ = simde_float32x4_to_private(c); - - #if defined(SIMDE_X86_FMA_NATIVE) - r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128); - #endif - - return simde_float32x4_from_private(r_); - #else - return simde_vaddq_f32(simde_vmulq_f32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_f32 - #define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlaq_f64(a, b, c); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_madd(b, c, a); - #elif \ - defined(SIMDE_X86_FMA_NATIVE) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b), - c_ = simde_float64x2_to_private(c); - - #if defined(SIMDE_X86_FMA_NATIVE) - r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d); - #endif - - return simde_float64x2_from_private(r_); - #else - return simde_vaddq_f64(simde_vmulq_f64(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_f64 - #define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_s8(a, b, c); - #else - return simde_vaddq_s8(simde_vmulq_s8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_s8 - #define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_s16(a, b, c); - #else - return simde_vaddq_s16(simde_vmulq_s16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_s16 - #define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_s32(a, b, c); - #else - return simde_vaddq_s32(simde_vmulq_s32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_s32 - #define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_u8(a, b, c); - #else - return simde_vaddq_u8(simde_vmulq_u8(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_u8 - #define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_u16(a, b, c); - #else - return simde_vaddq_u16(simde_vmulq_u16(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_u16 - #define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_u32(a, b, c); - #else - return simde_vaddq_u32(simde_vmulq_u32(b, c), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_u32 - #define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLA_H) */ -/* :: End simde/arm/neon/mla.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mla_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MLA_LANE_H) -#define SIMDE_ARM_NEON_MLA_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmla_lane_f32(a, b, v, lane) vmla_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vmla_lane_f32(a, b, v, lane) simde_vmla_f32((a), (b), simde_vdup_lane_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_lane_f32 - #define vmla_lane_f32(a, b, v, lane) simde_vmla_lane_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmla_laneq_f32(a, b, v, lane) vmla_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vmla_laneq_f32(a, b, v, lane) simde_vmla_f32((a), (b), simde_vdup_laneq_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_laneq_f32 - #define vmla_laneq_f32(a, b, v, lane) simde_vmla_laneq_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlaq_laneq_f32(a, b, v, lane) vmlaq_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_laneq_f32(a, b, v, lane) simde_vmlaq_f32((a), (b), simde_vdupq_laneq_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_laneq_f32 - #define vmlaq_laneq_f32(a, b, v, lane) simde_vmlaq_laneq_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmla_lane_s16(a, b, v, lane) vmla_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmla_lane_s16(a, b, v, lane) simde_vmla_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_lane_s16 - #define vmla_lane_s16(a, b, v, lane) simde_vmla_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmla_laneq_s16(a, b, v, lane) vmla_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmla_laneq_s16(a, b, v, lane) simde_vmla_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_laneq_s16 - #define vmla_laneq_s16(a, b, v, lane) simde_vmla_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlaq_laneq_s16(a, b, v, lane) vmlaq_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmlaq_laneq_s16(a, b, v, lane) simde_vmlaq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_laneq_s16 - #define vmlaq_laneq_s16(a, b, v, lane) simde_vmlaq_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmla_lane_s32(a, b, v, lane) vmla_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmla_lane_s32(a, b, v, lane) simde_vmla_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_lane_s32 - #define vmla_lane_s32(a, b, v, lane) simde_vmla_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmla_laneq_s32(a, b, v, lane) vmla_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmla_laneq_s32(a, b, v, lane) simde_vmla_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_laneq_s32 - #define vmla_laneq_s32(a, b, v, lane) simde_vmla_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlaq_laneq_s32(a, b, v, lane) vmlaq_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_laneq_s32(a, b, v, lane) simde_vmlaq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_laneq_s32 - #define vmlaq_laneq_s32(a, b, v, lane) simde_vmlaq_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmla_lane_u16(a, b, v, lane) vmla_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmla_lane_u16(a, b, v, lane) simde_vmla_u16((a), (b), simde_vdup_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_lane_u16 - #define vmla_lane_u16(a, b, v, lane) simde_vmla_lane_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmla_laneq_u16(a, b, v, lane) vmla_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmla_laneq_u16(a, b, v, lane) simde_vmla_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_laneq_u16 - #define vmla_laneq_u16(a, b, v, lane) simde_vmla_laneq_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlaq_laneq_u16(a, b, v, lane) vmlaq_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmlaq_laneq_u16(a, b, v, lane) simde_vmlaq_u16((a), (b), simde_vdupq_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_laneq_u16 - #define vmlaq_laneq_u16(a, b, v, lane) simde_vmlaq_laneq_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmla_lane_u32(a, b, v, lane) vmla_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmla_lane_u32(a, b, v, lane) simde_vmla_u32((a), (b), simde_vdup_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_lane_u32 - #define vmla_lane_u32(a, b, v, lane) simde_vmla_lane_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmla_laneq_u32(a, b, v, lane) vmla_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmla_laneq_u32(a, b, v, lane) simde_vmla_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmla_laneq_u32 - #define vmla_laneq_u32(a, b, v, lane) simde_vmla_laneq_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlaq_laneq_u32(a, b, v, lane) vmlaq_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_laneq_u32(a, b, v, lane) simde_vmlaq_u32((a), (b), simde_vdupq_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlaq_laneq_u32 - #define vmlaq_laneq_u32(a, b, v, lane) simde_vmlaq_laneq_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlaq_lane_f32(a, b, v, lane) vmlaq_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_lane_f32(a, b, v, lane) simde_vmlaq_f32((a), (b), simde_vdupq_lane_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_lane_f32 - #define vmlaq_lane_f32(a, b, v, lane) simde_vmlaq_lane_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlaq_lane_s16(a, b, v, lane) vmlaq_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmlaq_lane_s16(a, b, v, lane) simde_vmlaq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_lane_s16 - #define vmlaq_lane_s16(a, b, v, lane) simde_vmlaq_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlaq_lane_s32(a, b, v, lane) vmlaq_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_lane_s32(a, b, v, lane) simde_vmlaq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_lane_s32 - #define vmlaq_lane_s32(a, b, v, lane) simde_vmlaq_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlaq_lane_u16(a, b, v, lane) vmlaq_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmlaq_lane_u16(a, b, v, lane) simde_vmlaq_u16((a), (b), simde_vdupq_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_lane_u16 - #define vmlaq_lane_u16(a, b, v, lane) simde_vmlaq_lane_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlaq_lane_u32(a, b, v, lane) vmlaq_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmlaq_lane_u32(a, b, v, lane) simde_vmlaq_u32((a), (b), simde_vdupq_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_lane_u32 - #define vmlaq_lane_u32(a, b, v, lane) simde_vmlaq_lane_u32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLA_LANE_H) */ -/* :: End simde/arm/neon/mla_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mla_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MLA_N_H) -#define SIMDE_ARM_NEON_MLA_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mul_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MUL_N_H) -#define SIMDE_ARM_NEON_MUL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmul_n_f16(simde_float16x4_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmul_n_f16(a, b); - #else - return simde_vmul_f16(a, simde_vdup_n_f16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmul_n_f16 - #define vmul_n_f16(a, b) simde_vmul_n_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmul_n_f32(simde_float32x2_t a, simde_float32 b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_n_f32(a, b); - #else - return simde_vmul_f32(a, simde_vdup_n_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_n_f32 - #define vmul_n_f32(a, b) simde_vmul_n_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmul_n_f64(simde_float64x1_t a, simde_float64 b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmul_n_f64(a, b); - #else - return simde_vmul_f64(a, simde_vdup_n_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmul_n_f64 - #define vmul_n_f64(a, b) simde_vmul_n_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmul_n_s16(simde_int16x4_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_n_s16(a, b); - #else - return simde_vmul_s16(a, simde_vdup_n_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_n_s16 - #define vmul_n_s16(a, b) simde_vmul_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmul_n_s32(simde_int32x2_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_n_s32(a, b); - #else - return simde_vmul_s32(a, simde_vdup_n_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_n_s32 - #define vmul_n_s32(a, b) simde_vmul_n_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmul_n_u16(simde_uint16x4_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_n_u16(a, b); - #else - return simde_vmul_u16(a, simde_vdup_n_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_n_u16 - #define vmul_n_u16(a, b) simde_vmul_n_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmul_n_u32(simde_uint32x2_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmul_n_u32(a, b); - #else - return simde_vmul_u32(a, simde_vdup_n_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmul_n_u32 - #define vmul_n_u32(a, b) simde_vmul_n_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulq_n_f16(simde_float16x8_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulq_n_f16(a, b); - #else - return simde_vmulq_f16(a, simde_vdupq_n_f16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_f16 - #define vmulq_n_f16(a, b) simde_vmulq_n_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulq_n_f32(simde_float32x4_t a, simde_float32 b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_n_f32(a, b); - #else - return simde_vmulq_f32(a, simde_vdupq_n_f32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_f32 - #define vmulq_n_f32(a, b) simde_vmulq_n_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulq_n_f64(simde_float64x2_t a, simde_float64 b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulq_n_f64(a, b); - #else - return simde_vmulq_f64(a, simde_vdupq_n_f64(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_f64 - #define vmulq_n_f64(a, b) simde_vmulq_n_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmulq_n_s16(simde_int16x8_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_n_s16(a, b); - #else - return simde_vmulq_s16(a, simde_vdupq_n_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_s16 - #define vmulq_n_s16(a, b) simde_vmulq_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmulq_n_s32(simde_int32x4_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_n_s32(a, b); - #else - return simde_vmulq_s32(a, simde_vdupq_n_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_s32 - #define vmulq_n_s32(a, b) simde_vmulq_n_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmulq_n_u16(simde_uint16x8_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_n_u16(a, b); - #else - return simde_vmulq_u16(a, simde_vdupq_n_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_u16 - #define vmulq_n_u16(a, b) simde_vmulq_n_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmulq_n_u32(simde_uint32x4_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmulq_n_u32(a, b); - #else - return simde_vmulq_u32(a, simde_vdupq_n_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_n_u32 - #define vmulq_n_u32(a, b) simde_vmulq_n_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MUL_N_H) */ -/* :: End simde/arm/neon/mul_n.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmla_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_n_f32(a, b, c); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_n_f32 - #define vmla_n_f32(a, b, c) simde_vmla_n_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmla_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_n_s16(a, b, c); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_n_s16 - #define vmla_n_s16(a, b, c) simde_vmla_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmla_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_n_s32(a, b, c); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_n_s32 - #define vmla_n_s32(a, b, c) simde_vmla_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmla_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_n_u16(a, b, c); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_n_u16 - #define vmla_n_u16(a, b, c) simde_vmla_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmla_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmla_n_u32(a, b, c); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmla_n_u32 - #define vmla_n_u32(a, b, c) simde_vmla_n_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmlaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_n_f32(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - return simde_vaddq_f32(simde_vmulq_n_f32(b, c), a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_n_f32 - #define vmlaq_n_f32(a, b, c) simde_vmlaq_n_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlaq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_n_s16(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - return simde_vaddq_s16(simde_vmulq_n_s16(b, c), a); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_n_s16 - #define vmlaq_n_s16(a, b, c) simde_vmlaq_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlaq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_n_s32(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - return simde_vaddq_s32(simde_vmulq_n_s32(b, c), a); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_n_s32 - #define vmlaq_n_s32(a, b, c) simde_vmlaq_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlaq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_n_u16(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - return simde_vaddq_u16(simde_vmulq_n_u16(b, c), a); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_n_u16 - #define vmlaq_n_u16(a, b, c) simde_vmlaq_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlaq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlaq_n_u32(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - return simde_vaddq_u32(simde_vmulq_n_u32(b, c), a); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (b_.values * c) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c) + a_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlaq_n_u32 - #define vmlaq_n_u32(a, b, c) simde_vmlaq_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLA_N_H) */ -/* :: End simde/arm/neon/mla_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_H) -#define SIMDE_ARM_NEON_MLAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_s8(a, b, c); - #else - return simde_vmlaq_s16(a, simde_vmovl_s8(b), simde_vmovl_s8(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_s8 - #define vmlal_s8(a, b, c) simde_vmlal_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_s16(a, b, c); - #else - return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vmovl_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_s16 - #define vmlal_s16(a, b, c) simde_vmlal_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_s32(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), - c_ = simde_int64x2_to_private(simde_vmovl_s32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_s32 - #define vmlal_s32(a, b, c) simde_vmlal_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_u8(a, b, c); - #else - return simde_vmlaq_u16(a, simde_vmovl_u8(b), simde_vmovl_u8(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_u8 - #define vmlal_u8(a, b, c) simde_vmlal_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_u16(a, b, c); - #else - return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vmovl_u16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_u16 - #define vmlal_u16(a, b, c) simde_vmlal_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_u32(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), - c_ = simde_uint64x2_to_private(simde_vmovl_u32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_u32 - #define vmlal_u32(a, b, c) simde_vmlal_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_H) */ -/* :: End simde/arm/neon/mlal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) -#define SIMDE_ARM_NEON_MLAL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_s8(a, b, c); - #else - return simde_vmlaq_s16(a, simde_vmovl_high_s8(b), simde_vmovl_high_s8(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_s8 - #define vmlal_high_s8(a, b, c) simde_vmlal_high_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_s16(a, b, c); - #else - return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_s16 - #define vmlal_high_s16(a, b, c) simde_vmlal_high_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_s32(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), - c_ = simde_int64x2_to_private(simde_vmovl_high_s32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_s32 - #define vmlal_high_s32(a, b, c) simde_vmlal_high_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_u8(a, b, c); - #else - return simde_vmlaq_u16(a, simde_vmovl_high_u8(b), simde_vmovl_high_u8(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_u8 - #define vmlal_high_u8(a, b, c) simde_vmlal_high_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_u16(a, b, c); - #else - return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vmovl_high_u16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_u16 - #define vmlal_high_u16(a, b, c) simde_vmlal_high_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_u32(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), - c_ = simde_uint64x2_to_private(simde_vmovl_high_u32(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_u32 - #define vmlal_high_u32(a, b, c) simde_vmlal_high_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) */ -/* :: End simde/arm/neon/mlal_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_MLAL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_lane_s16(a, b, v, lane) vmlal_high_lane_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_lane_s16 - #define vmlal_high_lane_s16(a, b, v, lane) simde_vmlal_high_lane_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_laneq_s16(a, b, v, lane) vmlal_high_laneq_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_laneq_s16 - #define vmlal_high_laneq_s16(a, b, v, lane) simde_vmlal_high_laneq_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_lane_s32(a, b, v, lane) vmlal_high_lane_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_lane_s32 - #define vmlal_high_lane_s32(a, b, v, lane) simde_vmlal_high_lane_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_laneq_s32(a, b, v, lane) vmlal_high_laneq_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_laneq_s32 - #define vmlal_high_laneq_s32(a, b, v, lane) simde_vmlal_high_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_lane_u16(a, b, v, lane) vmlal_high_lane_u16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_lane_u16 - #define vmlal_high_lane_u16(a, b, v, lane) simde_vmlal_high_lane_u16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_laneq_u16(a, b, v, lane) vmlal_high_laneq_u16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_laneq_u16 - #define vmlal_high_laneq_u16(a, b, v, lane) simde_vmlal_high_laneq_u16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_lane_u32(a, b, v, lane) vmlal_high_lane_u32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_lane_u32 - #define vmlal_high_lane_u32(a, b, v, lane) simde_vmlal_high_lane_u32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_high_laneq_u32(a, b, v, lane) vmlal_high_laneq_u32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_laneq_u32 - #define vmlal_high_laneq_u32(a, b, v, lane) simde_vmlal_high_laneq_u32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/mlal_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Décio Luiz Gazzoni Filho - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) -#define SIMDE_ARM_NEON_MLAL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_n_s16(a, b, c); - #else - return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_n_s16 - #define vmlal_high_n_s16(a, b, c) simde_vmlal_high_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_n_s32(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), - c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_n_s32 - #define vmlal_high_n_s32(a, b, c) simde_vmlal_high_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_n_u16(a, b, c); - #else - return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_n_u16 - #define vmlal_high_n_u16(a, b, c) simde_vmlal_high_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlal_high_n_u32(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), - c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_high_n_u32 - #define vmlal_high_n_u32(a, b, c) simde_vmlal_high_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) */ -/* :: End simde/arm/neon/mlal_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_LANE_H) -#define SIMDE_ARM_NEON_MLAL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlal_lane_s16(a, b, v, lane) vmlal_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmlal_lane_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_lane_s16 - #define vmlal_lane_s16(a, b, c, lane) simde_vmlal_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlal_lane_s32(a, b, v, lane) vmlal_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmlal_lane_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_lane_s32 - #define vmlal_lane_s32(a, b, c, lane) simde_vmlal_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlal_lane_u16(a, b, v, lane) vmlal_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmlal_lane_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_lane_u16 - #define vmlal_lane_u16(a, b, c, lane) simde_vmlal_lane_u16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlal_lane_u32(a, b, v, lane) vmlal_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmlal_lane_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_lane_u32 - #define vmlal_lane_u32(a, b, c, lane) simde_vmlal_lane_u32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_laneq_s16(a, b, v, lane) vmlal_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmlal_laneq_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_laneq_s16 - #define vmlal_laneq_s16(a, b, c, lane) simde_vmlal_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_laneq_s32(a, b, v, lane) vmlal_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmlal_laneq_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_laneq_s32 - #define vmlal_laneq_s32(a, b, c, lane) simde_vmlal_laneq_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_laneq_u16(a, b, v, lane) vmlal_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmlal_laneq_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_laneq_u16 - #define vmlal_laneq_u16(a, b, c, lane) simde_vmlal_laneq_u16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlal_laneq_u32(a, b, v, lane) vmlal_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmlal_laneq_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlal_laneq_u32 - #define vmlal_laneq_u32(a, b, c, lane) simde_vmlal_laneq_u32((a), (b), (c), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_LANE_H) */ -/* :: End simde/arm/neon/mlal_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlal_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLAL_N_H) -#define SIMDE_ARM_NEON_MLAL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_n_s16(a, b, c); - #else - return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_n_s16 - #define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_n_s32(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), - c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_n_s32 - #define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_n_u16(a, b, c); - #else - return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_n_u16 - #define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlal_n_u32(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), - c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = (b_.values * c_.values) + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlal_n_u32 - #define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */ -/* :: End simde/arm/neon/mlal_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mls.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLS_H) -#define SIMDE_ARM_NEON_MLS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmls_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_f32(a, b, c); - #else - return simde_vsub_f32(a, simde_vmul_f32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_f32 - #define vmls_f32(a, b, c) simde_vmls_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmls_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmls_f64(a, b, c); - #else - return simde_vsub_f64(a, simde_vmul_f64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_f64 - #define vmls_f64(a, b, c) simde_vmls_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vmls_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_s8(a, b, c); - #else - return simde_vsub_s8(a, simde_vmul_s8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_s8 - #define vmls_s8(a, b, c) simde_vmls_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmls_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_s16(a, b, c); - #else - return simde_vsub_s16(a, simde_vmul_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_s16 - #define vmls_s16(a, b, c) simde_vmls_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmls_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_s32(a, b, c); - #else - return simde_vsub_s32(a, simde_vmul_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_s32 - #define vmls_s32(a, b, c) simde_vmls_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vmls_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_u8(a, b, c); - #else - return simde_vsub_u8(a, simde_vmul_u8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_u8 - #define vmls_u8(a, b, c) simde_vmls_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmls_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_u16(a, b, c); - #else - return simde_vsub_u16(a, simde_vmul_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_u16 - #define vmls_u16(a, b, c) simde_vmls_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmls_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_u32(a, b, c); - #else - return simde_vsub_u32(a, simde_vmul_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_u32 - #define vmls_u32(a, b, c) simde_vmls_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmlsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_f32(a, b, c); - #elif defined(SIMDE_X86_FMA_NATIVE) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b), - c_ = simde_float32x4_to_private(c); - r_.m128 = _mm_fnmadd_ps(b_.m128, c_.m128, a_.m128); - return simde_float32x4_from_private(r_); - #else - return simde_vsubq_f32(a, simde_vmulq_f32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_f32 - #define vmlsq_f32(a, b, c) simde_vmlsq_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmlsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsq_f64(a, b, c); - #elif defined(SIMDE_X86_FMA_NATIVE) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b), - c_ = simde_float64x2_to_private(c); - r_.m128d = _mm_fnmadd_pd(b_.m128d, c_.m128d, a_.m128d); - return simde_float64x2_from_private(r_); - #else - return simde_vsubq_f64(a, simde_vmulq_f64(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_f64 - #define vmlsq_f64(a, b, c) simde_vmlsq_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmlsq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_s8(a, b, c); - #else - return simde_vsubq_s8(a, simde_vmulq_s8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_s8 - #define vmlsq_s8(a, b, c) simde_vmlsq_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlsq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_s16(a, b, c); - #else - return simde_vsubq_s16(a, simde_vmulq_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_s16 - #define vmlsq_s16(a, b, c) simde_vmlsq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_s32(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmulq_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_s32 - #define vmlsq_s32(a, b, c) simde_vmlsq_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmlsq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_u8(a, b, c); - #else - return simde_vsubq_u8(a, simde_vmulq_u8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_u8 - #define vmlsq_u8(a, b, c) simde_vmlsq_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlsq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_u16(a, b, c); - #else - return simde_vsubq_u16(a, simde_vmulq_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_u16 - #define vmlsq_u16(a, b, c) simde_vmlsq_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_u32(a, b, c); - #else - return simde_vsubq_u32(a, simde_vmulq_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_u32 - #define vmlsq_u32(a, b, c) simde_vmlsq_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLS_H) */ -/* :: End simde/arm/neon/mls.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mls_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MLS_LANE_H) -#define SIMDE_ARM_NEON_MLS_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmls_lane_f32(a, b, v, lane) vmls_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vmls_lane_f32(a, b, v, lane) simde_vmls_f32((a), (b), simde_vdup_lane_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_lane_f32 - #define vmls_lane_f32(a, b, v, lane) simde_vmls_lane_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmls_laneq_f32(a, b, v, lane) vmls_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vmls_laneq_f32(a, b, v, lane) simde_vmls_f32((a), (b), simde_vdup_laneq_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmls_laneq_f32 - #define vmls_laneq_f32(a, b, v, lane) simde_vmls_laneq_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsq_laneq_f32(a, b, v, lane) vmlsq_laneq_f32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_laneq_f32(a, b, v, lane) simde_vmlsq_f32((a), (b), simde_vdupq_laneq_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsq_laneq_f32 - #define vmlsq_laneq_f32(a, b, v, lane) simde_vmlsq_laneq_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmls_lane_s16(a, b, v, lane) vmls_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmls_lane_s16(a, b, v, lane) simde_vmls_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_lane_s16 - #define vmls_lane_s16(a, b, v, lane) simde_vmls_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmls_laneq_s16(a, b, v, lane) vmls_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmls_laneq_s16(a, b, v, lane) simde_vmls_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmls_laneq_s16 - #define vmls_laneq_s16(a, b, v, lane) simde_vmls_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsq_laneq_s16(a, b, v, lane) vmlsq_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmlsq_laneq_s16(a, b, v, lane) simde_vmlsq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsq_laneq_s16 - #define vmlsq_laneq_s16(a, b, v, lane) simde_vmlsq_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmls_lane_s32(a, b, v, lane) vmls_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmls_lane_s32(a, b, v, lane) simde_vmls_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_lane_s32 - #define vmls_lane_s32(a, b, v, lane) simde_vmls_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmls_laneq_s32(a, b, v, lane) vmls_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmls_laneq_s32(a, b, v, lane) simde_vmls_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmls_laneq_s32 - #define vmls_laneq_s32(a, b, v, lane) simde_vmls_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsq_laneq_s32(a, b, v, lane) vmlsq_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_laneq_s32(a, b, v, lane) simde_vmlsq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsq_laneq_s32 - #define vmlsq_laneq_s32(a, b, v, lane) simde_vmlsq_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmls_lane_u16(a, b, v, lane) vmls_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmls_lane_u16(a, b, v, lane) simde_vmls_u16((a), (b), simde_vdup_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_lane_u16 - #define vmls_lane_u16(a, b, v, lane) simde_vmls_lane_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmls_laneq_u16(a, b, v, lane) vmls_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmls_laneq_u16(a, b, v, lane) simde_vmls_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmls_laneq_u16 - #define vmls_laneq_u16(a, b, v, lane) simde_vmls_laneq_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsq_laneq_u16(a, b, v, lane) vmlsq_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmlsq_laneq_u16(a, b, v, lane) simde_vmlsq_u16((a), (b), simde_vdupq_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsq_laneq_u16 - #define vmlsq_laneq_u16(a, b, v, lane) simde_vmlsq_laneq_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmls_lane_u32(a, b, v, lane) vmls_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmls_lane_u32(a, b, v, lane) simde_vmls_u32((a), (b), simde_vdup_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_lane_u32 - #define vmls_lane_u32(a, b, v, lane) simde_vmls_lane_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmls_laneq_u32(a, b, v, lane) vmls_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmls_laneq_u32(a, b, v, lane) simde_vmls_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmls_laneq_u32 - #define vmls_laneq_u32(a, b, v, lane) simde_vmls_laneq_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsq_laneq_u32(a, b, v, lane) vmlsq_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_laneq_u32(a, b, v, lane) simde_vmlsq_u32((a), (b), simde_vdupq_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsq_laneq_u32 - #define vmlsq_laneq_u32(a, b, v, lane) simde_vmlsq_laneq_u32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsq_lane_f32(a, b, v, lane) vmlsq_lane_f32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_lane_f32(a, b, v, lane) simde_vmlsq_f32((a), (b), simde_vdupq_lane_f32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_lane_f32 - #define vmlsq_lane_f32(a, b, v, lane) simde_vmlsq_lane_f32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsq_lane_s16(a, b, v, lane) vmlsq_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmlsq_lane_s16(a, b, v, lane) simde_vmlsq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_lane_s16 - #define vmlsq_lane_s16(a, b, v, lane) simde_vmlsq_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsq_lane_s32(a, b, v, lane) vmlsq_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_lane_s32(a, b, v, lane) simde_vmlsq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_lane_s32 - #define vmlsq_lane_s32(a, b, v, lane) simde_vmlsq_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsq_lane_u16(a, b, v, lane) vmlsq_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmlsq_lane_u16(a, b, v, lane) simde_vmlsq_u16((a), (b), simde_vdupq_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_lane_u16 - #define vmlsq_lane_u16(a, b, v, lane) simde_vmlsq_lane_u16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsq_lane_u32(a, b, v, lane) vmlsq_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmlsq_lane_u32(a, b, v, lane) simde_vmlsq_u32((a), (b), simde_vdupq_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_lane_u32 - #define vmlsq_lane_u32(a, b, v, lane) simde_vmlsq_lane_u32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLS_LANE_H) */ -/* :: End simde/arm/neon/mls_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mls_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLS_N_H) -#define SIMDE_ARM_NEON_MLS_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmls_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_n_f32(a, b, c); - #else - return simde_vmls_f32(a, b, simde_vdup_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_n_f32 - #define vmls_n_f32(a, b, c) simde_vmls_n_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vmls_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_n_s16(a, b, c); - #else - return simde_vmls_s16(a, b, simde_vdup_n_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_n_s16 - #define vmls_n_s16(a, b, c) simde_vmls_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vmls_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_n_s32(a, b, c); - #else - return simde_vmls_s32(a, b, simde_vdup_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_n_s32 - #define vmls_n_s32(a, b, c) simde_vmls_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vmls_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_n_u16(a, b, c); - #else - return simde_vmls_u16(a, b, simde_vdup_n_u16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_n_u16 - #define vmls_n_u16(a, b, c) simde_vmls_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vmls_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmls_n_u32(a, b, c); - #else - return simde_vmls_u32(a, b, simde_vdup_n_u32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmls_n_u32 - #define vmls_n_u32(a, b, c) simde_vmls_n_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmlsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_n_f32(a, b, c); - #else - return simde_vmlsq_f32(a, b, simde_vdupq_n_f32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_n_f32 - #define vmlsq_n_f32(a, b, c) simde_vmlsq_n_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlsq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_n_s16(a, b, c); - #else - return simde_vmlsq_s16(a, b, simde_vdupq_n_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_n_s16 - #define vmlsq_n_s16(a, b, c) simde_vmlsq_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_n_s32(a, b, c); - #else - return simde_vmlsq_s32(a, b, simde_vdupq_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_n_s32 - #define vmlsq_n_s32(a, b, c) simde_vmlsq_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlsq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_n_u16(a, b, c); - #else - return simde_vmlsq_u16(a, b, simde_vdupq_n_u16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_n_u16 - #define vmlsq_n_u16(a, b, c) simde_vmlsq_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsq_n_u32(a, b, c); - #else - return simde_vmlsq_u32(a, b, simde_vdupq_n_u32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsq_n_u32 - #define vmlsq_n_u32(a, b, c) simde_vmlsq_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLS_N_H) */ -/* :: End simde/arm/neon/mls_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_H) -#define SIMDE_ARM_NEON_MLSL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlsl_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_s8(a, b, c); - #else - return simde_vsubq_s16(a, simde_vmull_s8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_s8 - #define vmlsl_s8(a, b, c) simde_vmlsl_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_s16(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmull_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_s16 - #define vmlsl_s16(a, b, c) simde_vmlsl_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_s32(a, b, c); - #else - return simde_vsubq_s64(a, simde_vmull_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_s32 - #define vmlsl_s32(a, b, c) simde_vmlsl_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlsl_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_u8(a, b, c); - #else - return simde_vsubq_u16(a, simde_vmull_u8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_u8 - #define vmlsl_u8(a, b, c) simde_vmlsl_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_u16(a, b, c); - #else - return simde_vsubq_u32(a, simde_vmull_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_u16 - #define vmlsl_u16(a, b, c) simde_vmlsl_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_u32(a, b, c); - #else - return simde_vsubq_u64(a, simde_vmull_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_u32 - #define vmlsl_u32(a, b, c) simde_vmlsl_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_H) */ -/* :: End simde/arm/neon/mlsl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) -#define SIMDE_ARM_NEON_MLSL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULL_HIGH_H) -#define SIMDE_ARM_NEON_MULL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmull_high_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_s8(a, b); - #else - return simde_vmulq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_s8 - #define vmull_high_s8(a, b) simde_vmull_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_s16(a, b); - #else - return simde_vmulq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_s16 - #define vmull_high_s16(a, b) simde_vmull_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_s32(a, b); - #else - return simde_x_vmulq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_s32 - #define vmull_high_s32(a, b) simde_vmull_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmull_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_u8(a, b); - #else - return simde_vmulq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_u8 - #define vmull_high_u8(a, b) simde_vmull_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_u16(a, b); - #else - return simde_vmulq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_u16 - #define vmull_high_u16(a, b) simde_vmull_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_u32(a, b); - #else - return simde_x_vmulq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_u32 - #define vmull_high_u32(a, b) simde_vmull_high_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vmull_high_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_p8(a, b); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_p8(a)), - b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_p8(b)); - simde_uint16x8_private r_; - - size_t high_offset = (sizeof(r_.values) / sizeof(r_.values[0])); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint16_t extend_op2 = HEDLEY_STATIC_CAST(uint16_t, b_.values[i+high_offset]); - uint16_t result = 0; - for(size_t j = 0; j < 8; ++j) { - if (a_.values[i+high_offset] & (1 << j)) { - result = HEDLEY_STATIC_CAST(uint16_t, result ^ (extend_op2 << j)); - } - } - r_.values[i] = result; - } - - return simde_vreinterpretq_p16_u16(simde_uint16x8_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_p8 - #define vmull_high_p8(a, b) simde_vmull_high_p8((a), (b)) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -simde_poly128_t -simde_vmull_high_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - return vmull_high_p64(a, b); - #else - simde_poly64x2_private - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - return simde_vmull_p64(a_.values[1], b_.values[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_p64 - #define vmull_high_p64(a, b) simde_vmull_high_p64((a), (b)) -#endif -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_H) */ -/* :: End simde/arm/neon/mull_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmlsl_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_s8(a, b, c); - #else - return simde_vsubq_s16(a, simde_vmull_high_s8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_s8 - #define vmlsl_high_s8(a, b, c) simde_vmlsl_high_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_s16(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmull_high_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_s16 - #define vmlsl_high_s16(a, b, c) simde_vmlsl_high_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_s32(a, b, c); - #else - return simde_vsubq_s64(a, simde_vmull_high_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_s32 - #define vmlsl_high_s32(a, b, c) simde_vmlsl_high_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmlsl_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_u8(a, b, c); - #else - return simde_vsubq_u16(a, simde_vmull_high_u8(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_u8 - #define vmlsl_high_u8(a, b, c) simde_vmlsl_high_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_u16(a, b, c); - #else - return simde_vsubq_u32(a, simde_vmull_high_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_u16 - #define vmlsl_high_u16(a, b, c) simde_vmlsl_high_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_u32(a, b, c); - #else - return simde_vsubq_u64(a, simde_vmull_high_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_u32 - #define vmlsl_high_u32(a, b, c) simde_vmlsl_high_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) */ -/* :: End simde/arm/neon/mlsl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_MLSL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_lane_s16(a, b, v, lane) vmlsl_high_lane_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_lane_s16 - #define vmlsl_high_lane_s16(a, b, v, lane) simde_vmlsl_high_lane_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_laneq_s16(a, b, v, lane) vmlsl_high_laneq_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_laneq_s16 - #define vmlsl_high_laneq_s16(a, b, v, lane) simde_vmlsl_high_laneq_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_lane_s32(a, b, v, lane) vmlsl_high_lane_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_lane_s32 - #define vmlsl_high_lane_s32(a, b, v, lane) simde_vmlsl_high_lane_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_laneq_s32(a, b, v, lane) vmlsl_high_laneq_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_laneq_s32 - #define vmlsl_high_laneq_s32(a, b, v, lane) simde_vmlsl_high_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_lane_u16(a, b, v, lane) vmlsl_high_lane_u16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_lane_u16 - #define vmlsl_high_lane_u16(a, b, v, lane) simde_vmlsl_high_lane_u16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_laneq_u16(a, b, v, lane) vmlsl_high_laneq_u16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_laneq_u16 - #define vmlsl_high_laneq_u16(a, b, v, lane) simde_vmlsl_high_laneq_u16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_lane_u32(a, b, v, lane) vmlsl_high_lane_u32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_lane_u32 - #define vmlsl_high_lane_u32(a, b, v, lane) simde_vmlsl_high_lane_u32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_high_laneq_u32(a, b, v, lane) vmlsl_high_laneq_u32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_laneq_u32 - #define vmlsl_high_laneq_u32(a, b, v, lane) simde_vmlsl_high_laneq_u32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/mlsl_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Décio Luiz Gazzoni Filho - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) -#define SIMDE_ARM_NEON_MLSL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_n_s16(a, b, c); - #else - return simde_vmlsq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_n_s16 - #define vmlsl_high_n_s16(a, b, c) simde_vmlsl_high_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_n_s32(a, b, c); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), - c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - (b_.values * c_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_n_s32 - #define vmlsl_high_n_s32(a, b, c) simde_vmlsl_high_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_n_u16(a, b, c); - #else - return simde_vmlsq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_n_u16 - #define vmlsl_high_n_u16(a, b, c) simde_vmlsl_high_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmlsl_high_n_u32(a, b, c); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), - c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values - (b_.values * c_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_high_n_u32 - #define vmlsl_high_n_u32(a, b, c) simde_vmlsl_high_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) */ -/* :: End simde/arm/neon/mlsl_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_LANE_H) -#define SIMDE_ARM_NEON_MLSL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsl_lane_s16(a, b, v, lane) vmlsl_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vmlsl_lane_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_lane_s16 - #define vmlsl_lane_s16(a, b, c, lane) simde_vmlsl_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsl_lane_s32(a, b, v, lane) vmlsl_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vmlsl_lane_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_lane_s32 - #define vmlsl_lane_s32(a, b, c, lane) simde_vmlsl_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsl_lane_u16(a, b, v, lane) vmlsl_lane_u16((a), (b), (v), (lane)) -#else - #define simde_vmlsl_lane_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_lane_u16 - #define vmlsl_lane_u16(a, b, c, lane) simde_vmlsl_lane_u16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmlsl_lane_u32(a, b, v, lane) vmlsl_lane_u32((a), (b), (v), (lane)) -#else - #define simde_vmlsl_lane_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_lane_u32 - #define vmlsl_lane_u32(a, b, c, lane) simde_vmlsl_lane_u32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_laneq_s16(a, b, v, lane) vmlsl_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vmlsl_laneq_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_laneq_s16 - #define vmlsl_laneq_s16(a, b, c, lane) simde_vmlsl_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_laneq_s32(a, b, v, lane) vmlsl_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vmlsl_laneq_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_laneq_s32 - #define vmlsl_laneq_s32(a, b, c, lane) simde_vmlsl_laneq_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_laneq_u16(a, b, v, lane) vmlsl_laneq_u16((a), (b), (v), (lane)) -#else - #define simde_vmlsl_laneq_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_laneq_u16 - #define vmlsl_laneq_u16(a, b, c, lane) simde_vmlsl_laneq_u16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmlsl_laneq_u32(a, b, v, lane) vmlsl_laneq_u32((a), (b), (v), (lane)) -#else - #define simde_vmlsl_laneq_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmlsl_laneq_u32 - #define vmlsl_laneq_u32(a, b, c, lane) simde_vmlsl_laneq_u32((a), (b), (c), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_LANE_H) */ -/* :: End simde/arm/neon/mlsl_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mlsl_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MLSL_N_H) -#define SIMDE_ARM_NEON_MLSL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_MULL_N_H) -#define SIMDE_ARM_NEON_MULL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_n_s16(simde_int16x4_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_n_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_n_s32(simde_vmovl_s16(a), b); - #else - simde_int32x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av; - SIMDE_CONVERT_VECTOR_(av, a_.values); - r_.values = av * b; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_n_s16 - #define vmull_n_s16(a, b) simde_vmull_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_n_s32(simde_int32x2_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_n_s32(a, b); - #else - simde_int64x2_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av; - SIMDE_CONVERT_VECTOR_(av, a_.values); - r_.values = av * b; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_n_s32 - #define vmull_n_s32(a, b) simde_vmull_n_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_n_u16(simde_uint16x4_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_n_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vmulq_n_u32(simde_vmovl_u16(a), b); - #else - simde_uint32x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) - __typeof__(r_.values) av; - SIMDE_CONVERT_VECTOR_(av, a_.values); - r_.values = av * b; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_n_u16 - #define vmull_n_u16(a, b) simde_vmull_n_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_n_u32(simde_uint32x2_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmull_n_u32(a, b); - #else - simde_uint64x2_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) av; - SIMDE_CONVERT_VECTOR_(av, a_.values); - r_.values = av * b; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_n_u32 - #define vmull_n_u32(a, b) simde_vmull_n_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ -/* :: End simde/arm/neon/mull_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_n_s16(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmull_n_s16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_n_s16 - #define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_n_s32(a, b, c); - #else - return simde_vsubq_s64(a, simde_vmull_n_s32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_n_s32 - #define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_n_u16(a, b, c); - #else - return simde_vsubq_u32(a, simde_vmull_n_u16(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_n_u16 - #define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vmlsl_n_u32(a, b, c); - #else - return simde_vsubq_u64(a, simde_vmull_n_u32(b, c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmlsl_n_u32 - #define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */ -/* :: End simde/arm/neon/mlsl_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mmlaq.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MMLAQ_H) -#define SIMDE_ARM_NEON_MMLAQ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmmlaq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) { - // I8MM is optional feature. src: https://patchwork.ffmpeg.org/project/ffmpeg/patch/20230530123043.52940-2-martin@martin.st/ - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - return vmmlaq_s32(r, a, b); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_int32x4_private - r_ = simde_int32x4_to_private(r), - ret; - - for (size_t k = 0 ; k < (sizeof(ret.values) / sizeof(ret.values[0])) ; k++) { - ret.values[k] = r_.values[k]; - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0]) / 2) ; i++) { - ret.values[k] += a_.values[(k/2)*8+i] * b_.values[(k%2)*8+i]; - } - } - return simde_int32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmmlaq_s32 - #define vmmlaq_s32(r, a, b) simde_vmmlaq_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmmlaq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - return vmmlaq_u32(r, a, b); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - simde_uint32x4_private - r_ = simde_uint32x4_to_private(r), - ret; - - for (size_t k = 0 ; k < (sizeof(ret.values) / sizeof(ret.values[0])) ; k++) { - ret.values[k] = r_.values[k]; - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0]) / 2) ; i++) { - ret.values[k] += a_.values[(k/2)*8+i] * b_.values[(k%2)*8+i]; - } - } - return simde_uint32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vmmlaq_u32 - #define vmmlaq_u32(r, a, b) simde_vmmlaq_u32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vusmmlaq_s32(simde_int32x4_t r, simde_uint8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - return vusmmlaq_s32(r, a, b); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private - b_ = simde_int8x16_to_private(b); - simde_int32x4_private - r_ = simde_int32x4_to_private(r), - ret; - - for (size_t k = 0 ; k < (sizeof(ret.values) / sizeof(ret.values[0])) ; k++) { - ret.values[k] = r_.values[k]; - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0]) / 2) ; i++) { - ret.values[k] += a_.values[(k/2)*8+i] * b_.values[(k%2)*8+i]; - } - } - return simde_int32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vusmmlaq_s32 - #define vusmmlaq_s32(r, a, b) simde_vusmmlaq_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vbfmmlaq_f32(simde_float32x4_t r, simde_bfloat16x8_t a, simde_bfloat16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) && \ - defined(SIMDE_ARM_NEON_BF16) - return vbfmmlaq_f32(r, a, b); - #else - simde_bfloat16x8_private - a_ = simde_bfloat16x8_to_private(a), - b_ = simde_bfloat16x8_to_private(b); - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - ret; - - for (size_t k = 0 ; k < (sizeof(ret.values) / sizeof(ret.values[0])) ; k++) { - ret.values[k] = r_.values[k]; - for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0]) / 2) ; i++) { - ret.values[k] += simde_bfloat16_to_float32(a_.values[(k/2)*4+i]) * - simde_bfloat16_to_float32(b_.values[(k%2)*4+i]); - } - } - return simde_float32x4_from_private(ret); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vbfmmlaq_f32 - #define vbfmmlaq_f32(r, a, b) simde_vbfmmlaq_f32((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MMLAQ_H) */ -/* :: End simde/arm/neon/mmlaq.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/movn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) -#define SIMDE_ARM_NEON_MOVN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_s16(r, a); - #else - return simde_vcombine_s8(r, simde_vmovn_s16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_s16 - #define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_s32(r, a); - #else - return simde_vcombine_s16(r, simde_vmovn_s32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_s32 - #define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_s64(r, a); - #else - return simde_vcombine_s32(r, simde_vmovn_s64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_s64 - #define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_u16(r, a); - #else - return simde_vcombine_u8(r, simde_vmovn_u16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_u16 - #define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_u32(r, a); - #else - return simde_vcombine_u16(r, simde_vmovn_u32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_u32 - #define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmovn_high_u64(r, a); - #else - return simde_vcombine_u32(r, simde_vmovn_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmovn_high_u64 - #define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */ -/* :: End simde/arm/neon/movn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_MULL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_high_lane_s16(simde_int16x8_t a, simde_int16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_private - v_ = simde_int16x4_to_private(v); - return simde_vmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_lane_s16(a, v, lane) vmull_high_lane_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_lane_s16 - #define vmull_high_lane_s16(a, v, lane) simde_vmull_high_lane_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_high_laneq_s16(simde_int16x8_t a, simde_int16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_private - v_ = simde_int16x8_to_private(v); - return simde_vmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_laneq_s16(a, v, lane) vmull_high_laneq_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_laneq_s16 - #define vmull_high_laneq_s16(a, v, lane) simde_vmull_high_laneq_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_high_lane_s32(simde_int32x4_t a, simde_int32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_private - v_ = simde_int32x2_to_private(v); - return simde_vmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_lane_s32(a, v, lane) vmull_high_lane_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_lane_s32 - #define vmull_high_lane_s32(a, v, lane) simde_vmull_high_lane_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_high_laneq_s32(simde_int32x4_t a, simde_int32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private - v_ = simde_int32x4_to_private(v); - return simde_vmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_laneq_s32(a, v, lane) vmull_high_laneq_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_laneq_s32 - #define vmull_high_laneq_s32(a, v, lane) simde_vmull_high_laneq_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_high_lane_u16(simde_uint16x8_t a, simde_uint16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4_private - v_ = simde_uint16x4_to_private(v); - return simde_vmull_u16(simde_vget_high_u16(a), simde_vdup_n_u16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_lane_u16(a, v, lane) vmull_high_lane_u16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_lane_u16 - #define vmull_high_lane_u16(a, v, lane) simde_vmull_high_lane_u16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_high_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8_private - v_ = simde_uint16x8_to_private(v); - return simde_vmull_u16(simde_vget_high_u16(a), simde_vdup_n_u16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_laneq_u16(a, v, lane) vmull_high_laneq_u16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_laneq_u16 - #define vmull_high_laneq_u16(a, v, lane) simde_vmull_high_laneq_u16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_high_lane_u32(simde_uint32x4_t a, simde_uint32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2_private - v_ = simde_uint32x2_to_private(v); - return simde_vmull_u32(simde_vget_high_u32(a), simde_vdup_n_u32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_lane_u32(a, v, lane) vmull_high_lane_u32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_lane_u32 - #define vmull_high_lane_u32(a, v, lane) simde_vmull_high_lane_u32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_high_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4_private - v_ = simde_uint32x4_to_private(v); - return simde_vmull_u32(simde_vget_high_u32(a), simde_vdup_n_u32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_high_laneq_u32(a, v, lane) vmull_high_laneq_u32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_laneq_u32 - #define vmull_high_laneq_u32(a, v, lane) simde_vmull_high_laneq_u32((a), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/mull_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULL_HIGH_N_H) -#define SIMDE_ARM_NEON_MULL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vmull_high_n_s16(simde_int16x8_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_n_s16(a, b); - #else - return simde_vmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_n_s16 - #define vmull_high_n_s16(a, b) simde_vmull_high_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vmull_high_n_s32(simde_int32x4_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_n_s32(a, b); - #else - return simde_vmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_n_s32 - #define vmull_high_n_s32(a, b) simde_vmull_high_n_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vmull_high_n_u16(simde_uint16x8_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_n_u16(a, b); - #else - return simde_vmull_u16(simde_vget_high_u16(a), simde_vdup_n_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_n_u16 - #define vmull_high_n_u16(a, b) simde_vmull_high_n_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vmull_high_n_u32(simde_uint32x4_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmull_high_n_u32(a, b); - #else - return simde_vmull_u32(simde_vget_high_u32(a), simde_vdup_n_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_high_n_u32 - #define vmull_high_n_u32(a, b) simde_vmull_high_n_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_N_H) */ -/* :: End simde/arm/neon/mull_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mull_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_MULL_LANE_H) -#define SIMDE_ARM_NEON_MULL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmull_lane_s16(a, v, lane) vmull_lane_s16((a), (v), (lane)) -#else - #define simde_vmull_lane_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_lane_s16 - #define vmull_lane_s16(a, v, lane) simde_vmull_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmull_lane_s32(a, v, lane) vmull_lane_s32((a), (v), (lane)) -#else - #define simde_vmull_lane_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_lane_s32 - #define vmull_lane_s32(a, v, lane) simde_vmull_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmull_lane_u16(a, v, lane) vmull_lane_u16((a), (v), (lane)) -#else - #define simde_vmull_lane_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_lane_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_lane_u16 - #define vmull_lane_u16(a, v, lane) simde_vmull_lane_u16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vmull_lane_u32(a, v, lane) vmull_lane_u32((a), (v), (lane)) -#else - #define simde_vmull_lane_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_lane_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmull_lane_u32 - #define vmull_lane_u32(a, v, lane) simde_vmull_lane_u32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_laneq_s16(a, v, lane) vmull_laneq_s16((a), (v), (lane)) -#else - #define simde_vmull_laneq_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_laneq_s16 - #define vmull_laneq_s16(a, v, lane) simde_vmull_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_laneq_s32(a, v, lane) vmull_laneq_s32((a), (v), (lane)) -#else - #define simde_vmull_laneq_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_laneq_s32 - #define vmull_laneq_s32(a, v, lane) simde_vmull_laneq_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_laneq_u16(a, v, lane) vmull_laneq_u16((a), (v), (lane)) -#else - #define simde_vmull_laneq_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_laneq_u16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_laneq_u16 - #define vmull_laneq_u16(a, v, lane) simde_vmull_laneq_u16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmull_laneq_u32(a, v, lane) vmull_laneq_u32((a), (v), (lane)) -#else - #define simde_vmull_laneq_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_laneq_u32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmull_laneq_u32 - #define vmull_laneq_u32(a, v, lane) simde_vmull_laneq_u32((a), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULL_LANE_H) */ -/* :: End simde/arm/neon/mull_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mulx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULX_H) -#define SIMDE_ARM_NEON_MULX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulxh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulxh_f16(a, b); - #else - return simde_float16_from_float32( - simde_float16_to_float32(a) * - simde_float16_to_float32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxh_f16 - #define vmulxh_f16(a, b) simde_vmulxh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmulxs_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulxs_f32(a, b); - #else - return a * b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxs_f32 - #define vmulxs_f32(a, b) simde_vmulxs_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmulxd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulxd_f64(a, b); - #else - return a * b; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxd_f64 - #define vmulxd_f64(a, b) simde_vmulxd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmulx_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulx_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulxh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_f16 - #define vmulx_f16(a, b) simde_vmulx_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmulx_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulx_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_f32 - #define vmulx_f32(a, b) simde_vmulx_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmulx_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulx_f64(a, b); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values * b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_f64 - #define vmulx_f64(a, b) simde_vmulx_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulxq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulxq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vmulxh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_f16 - #define vmulxq_f16(a, b) simde_vmulxq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulxq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulxq_f32(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_f32 - #define vmulxq_f32(a, b) simde_vmulxq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulxq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vmulxq_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[i]; - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_f64 - #define vmulxq_f64(a, b) simde_vmulxq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULX_H) */ -/* :: End simde/arm/neon/mulx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mulx_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULX_LANE_H) -#define SIMDE_ARM_NEON_MULX_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulxh_lane_f16(simde_float16_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_float16_from_float32( - simde_float16_to_float32(a) * - simde_float16_to_float32(simde_float16x4_to_private(b).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulxh_lane_f16(a, b, lane) vmulxh_lane_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxh_lane_f16 - #define vmulxh_lane_f16(a, b, lane) simde_vmulxh_lane_f16(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmulxs_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return a * simde_float32x2_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxs_lane_f32(a, b, lane) vmulxs_lane_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxs_lane_f32 - #define vmulxs_lane_f32(a, b, lane) simde_vmulxs_lane_f32(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmulxd_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - return a * simde_float64x1_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxd_lane_f64(a, b, lane) vmulxd_lane_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxd_lane_f64 - #define vmulxd_lane_f64(a, b, lane) simde_vmulxd_lane_f64(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vmulxh_laneq_f16(simde_float16_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_float16_from_float32( - simde_float16_to_float32(a) * - simde_float16_to_float32(simde_float16x8_to_private(b).values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulxh_laneq_f16(a, b, lane) vmulxh_laneq_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxh_laneq_f16 - #define vmulxh_laneq_f16(a, b, lane) simde_vmulxh_laneq_f16(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vmulxs_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return a * simde_float32x4_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxs_laneq_f32(a, b, lane) vmulxs_laneq_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxs_laneq_f32 - #define vmulxs_laneq_f32(a, b, lane) simde_vmulxs_laneq_f32(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vmulxd_laneq_f64(simde_float64_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return a * simde_float64x2_to_private(b).values[lane]; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxd_laneq_f64(a, b, lane) vmulxd_laneq_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxd_laneq_f64 - #define vmulxd_laneq_f64(a, b, lane) simde_vmulxd_laneq_f64(a, b, lane) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmulx_lane_f16(simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - simde_float32_t b_lane_ = simde_float16_to_float32(b_.values[lane]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * b_lane_); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulx_lane_f16(a, b, lane) vmulx_lane_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_lane_f16 - #define vmulx_lane_f16(a, b, lane) simde_vmulx_lane_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmulx_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulx_lane_f32(a, b, lane) vmulx_lane_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_lane_f32 - #define vmulx_lane_f32(a, b, lane) simde_vmulx_lane_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmulx_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a), - b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulx_lane_f64(a, b, lane) vmulx_lane_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_lane_f64 - #define vmulx_lane_f64(a, b, lane) simde_vmulx_lane_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulxq_lane_f16(simde_float16x8_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - simde_float16x4_private b_ = simde_float16x4_to_private(b); - simde_float32_t b_lane_ = simde_float16_to_float32(b_.values[lane]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * b_lane_); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulxq_lane_f16(a, b, lane) vmulxq_lane_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_lane_f16 - #define vmulxq_lane_f16(a, b, lane) simde_vmulxq_lane_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulxq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - simde_float32x2_private b_ = simde_float32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxq_lane_f32(a, b, lane) vmulxq_lane_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_lane_f32 - #define vmulxq_lane_f32(a, b, lane) simde_vmulxq_lane_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulxq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - simde_float64x1_private b_ = simde_float64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxq_lane_f64(a, b, lane) vmulxq_lane_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_lane_f64 - #define vmulxq_lane_f64(a, b, lane) simde_vmulxq_lane_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulxq_laneq_f16(simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - simde_float32_t b_lane_ = simde_float16_to_float32(b_.values[lane]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * b_lane_); - } - - return simde_float16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulxq_laneq_f16(a, b, lane) vmulxq_laneq_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_laneq_f16 - #define vmulxq_laneq_f16(a, b, lane) simde_vmulxq_laneq_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vmulxq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxq_laneq_f32(a, b, lane) vmulxq_laneq_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_laneq_f32 - #define vmulxq_laneq_f32(a, b, lane) simde_vmulxq_laneq_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vmulxq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulxq_laneq_f64(a, b, lane) vmulxq_laneq_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_laneq_f64 - #define vmulxq_laneq_f64(a, b, lane) simde_vmulxq_laneq_f64((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmulx_laneq_f16(simde_float16x4_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - simde_float16x8_private b_ = simde_float16x8_to_private(b); - simde_float32_t b_lane_ = simde_float16_to_float32(b_.values[lane]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32( - simde_float16_to_float32(a_.values[i]) * b_lane_); - } - - return simde_float16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vmulx_laneq_f16(a, b, lane) vmulx_laneq_f16((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_laneq_f16 - #define vmulx_laneq_f16(a, b, lane) simde_vmulx_laneq_f16((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vmulx_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - simde_float32x4_private b_ = simde_float32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulx_laneq_f32(a, b, lane) vmulx_laneq_f32((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_laneq_f32 - #define vmulx_laneq_f32(a, b, lane) simde_vmulx_laneq_f32((a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vmulx_laneq_f64(simde_float64x1_t a, simde_float64x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - simde_float64x2_private b_ = simde_float64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; - } - - return simde_float64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vmulx_laneq_f64(a, b, lane) vmulx_laneq_f64((a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_laneq_f64 - #define vmulx_laneq_f64(a, b, lane) simde_vmulx_laneq_f64((a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULX_LANE_H) */ -/* :: End simde/arm/neon/mulx_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/mulx_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_MULX_N_H) -#define SIMDE_ARM_NEON_MULX_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vmulx_n_f16(simde_float16x4_t a, simde_float16 b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulx_n_f16(a, b); - #else - return simde_vmul_f16(a, simde_vdup_n_f16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulx_n_f16 - #define vmulx_n_f16(a, b) simde_vmulx_n_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vmulxq_n_f16(simde_float16x8_t a, simde_float16 b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vmulxq_n_f16(a, b); - #else - return simde_vmulq_f16(a, simde_vdupq_n_f16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vmulxq_n_f16 - #define vmulxq_n_f16(a, b) simde_vmulxq_n_f16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_MULX_N_H) */ -/* :: End simde/arm/neon/mulx_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/orn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_ORN_H) -#define SIMDE_ARM_NEON_ORN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/orr.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_ORR_H) -#define SIMDE_ARM_NEON_ORR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vorr_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_s8 - #define vorr_s8(a, b) simde_vorr_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vorr_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_s16 - #define vorr_s16(a, b) simde_vorr_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vorr_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_s32 - #define vorr_s32(a, b) simde_vorr_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vorr_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_s64 - #define vorr_s64(a, b) simde_vorr_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vorr_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_u8 - #define vorr_u8(a, b) simde_vorr_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vorr_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_u16 - #define vorr_u16(a, b) simde_vorr_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vorr_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_u32 - #define vorr_u32(a, b) simde_vorr_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vorr_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorr_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_or_si64(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorr_u64 - #define vorr_u64(a, b) simde_vorr_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vorrq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_s8 - #define vorrq_s8(a, b) simde_vorrq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vorrq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_s16 - #define vorrq_s16(a, b) simde_vorrq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vorrq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_s32 - #define vorrq_s32(a, b) simde_vorrq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vorrq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_or(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_s64 - #define vorrq_s64(a, b) simde_vorrq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vorrq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_u8 - #define vorrq_u8(a, b) simde_vorrq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vorrq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_u16 - #define vorrq_u16(a, b) simde_vorrq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vorrq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_or(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_u32 - #define vorrq_u32(a, b) simde_vorrq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vorrq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorrq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_or(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | b_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorrq_u64 - #define vorrq_u64(a, b) simde_vorrq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ORR_H) */ -/* :: End simde/arm/neon/orr.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vorn_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_s8(a, b); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_s8 - #define vorn_s8(a, b) simde_vorn_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vorn_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_s16(a, b); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_s16 - #define vorn_s16(a, b) simde_vorn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vorn_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_s32(a, b); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_s32 - #define vorn_s32(a, b) simde_vorn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vorn_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_s64(a, b); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_s64 - #define vorn_s64(a, b) simde_vorn_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vorn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_u8(a, b); - #else - simde_uint8x8_private - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_u8 - #define vorn_u8(a, b) simde_vorn_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vorn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_u16(a, b); - #else - simde_uint16x4_private - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_u16 - #define vorn_u16(a, b) simde_vorn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vorn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_u32(a, b); - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_u32 - #define vorn_u32(a, b) simde_vorn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vorn_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vorn_u64(a, b); - #else - simde_uint64x1_private - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vorn_u64 - #define vorn_u64(a, b) simde_vorn_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vornq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_s8 - #define vornq_s8(a, b) simde_vornq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vornq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_s16 - #define vornq_s16(a, b) simde_vornq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vornq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_s32 - #define vornq_s32(a, b) simde_vornq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vornq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_s64 - #define vornq_s64(a, b) simde_vornq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vornq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_uint8x16_private - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_u8 - #define vornq_u8(a, b) simde_vornq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vornq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_uint16x8_private - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_u16 - #define vornq_u16(a, b) simde_vornq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vornq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_u32 - #define vornq_u32(a, b) simde_vornq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vornq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vornq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return vec_orc(a, b); - #else - simde_uint64x2_private - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = a_.values | ~(b_.values); - #else - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] | ~b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vornq_u64 - #define vornq_u64(a, b) simde_vornq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ORN_H) */ -/* :: End simde/arm/neon/orn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/padal.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_PADAL_H) -#define SIMDE_ARM_NEON_PADAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vpadal_s8(simde_int16x4_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_s8(a, b); - #else - return simde_vadd_s16(a, simde_vpaddl_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_s8 - #define vpadal_s8(a, b) simde_vpadal_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vpadal_s16(simde_int32x2_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_s16(a, b); - #else - return simde_vadd_s32(a, simde_vpaddl_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_s16 - #define vpadal_s16(a, b) simde_vpadal_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vpadal_s32(simde_int64x1_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_s32(a, b); - #else - return simde_vadd_s64(a, simde_vpaddl_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_s32 - #define vpadal_s32(a, b) simde_vpadal_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vpadal_u8(simde_uint16x4_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_u8(a, b); - #else - return simde_vadd_u16(a, simde_vpaddl_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_u8 - #define vpadal_u8(a, b) simde_vpadal_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vpadal_u16(simde_uint32x2_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_u16(a, b); - #else - return simde_vadd_u32(a, simde_vpaddl_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_u16 - #define vpadal_u16(a, b) simde_vpadal_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vpadal_u32(simde_uint64x1_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadal_u32(a, b); - #else - return simde_vadd_u64(a, simde_vpaddl_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadal_u32 - #define vpadal_u32(a, b) simde_vpadal_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vpadalq_s8(simde_int16x8_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_s8(a, b); - #else - return simde_vaddq_s16(a, simde_vpaddlq_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_s8 - #define vpadalq_s8(a, b) simde_vpadalq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vpadalq_s16(simde_int32x4_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_s16(a, b); - #else - return simde_vaddq_s32(a, simde_vpaddlq_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_s16 - #define vpadalq_s16(a, b) simde_vpadalq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vpadalq_s32(simde_int64x2_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_s32(a, b); - #else - return simde_vaddq_s64(a, simde_vpaddlq_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_s32 - #define vpadalq_s32(a, b) simde_vpadalq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vpadalq_u8(simde_uint16x8_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_u8(a, b); - #else - return simde_vaddq_u16(a, simde_vpaddlq_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_u8 - #define vpadalq_u8(a, b) simde_vpadalq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vpadalq_u16(simde_uint32x4_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_u16(a, b); - #else - return simde_vaddq_u32(a, simde_vpaddlq_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_u16 - #define vpadalq_u16(a, b) simde_vpadalq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vpadalq_u32(simde_uint64x2_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpadalq_u32(a, b); - #else - return simde_vaddq_u64(a, simde_vpaddlq_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpadalq_u32 - #define vpadalq_u32(a, b) simde_vpadalq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_NEON_PADAL_H */ -/* :: End simde/arm/neon/padal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/pmax.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_PMAX_H) -#define SIMDE_ARM_NEON_PMAX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vpmaxs_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxs_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxs_f32 - #define vpmaxs_f32(a) simde_vpmaxs_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vpmaxqd_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxqd_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxqd_f64 - #define vpmaxqd_f64(a) simde_vpmaxqd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vpmax_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpmax_f16(a, b); - #else - return simde_vmax_f16(simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vpmax_f16 - #define vpmax_f16(a, b) simde_vpmax_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vpmax_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_f32(a, b); - #else - return simde_vmax_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_f32 - #define vpmax_f32(a, b) simde_vpmax_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vpmax_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_s8(a, b); - #else - return simde_vmax_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_s8 - #define vpmax_s8(a, b) simde_vpmax_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vpmax_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_s16(a, b); - #else - return simde_vmax_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_s16 - #define vpmax_s16(a, b) simde_vpmax_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vpmax_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_s32(a, b); - #else - return simde_vmax_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_s32 - #define vpmax_s32(a, b) simde_vpmax_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vpmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_u8(a, b); - #else - return simde_vmax_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_u8 - #define vpmax_u8(a, b) simde_vpmax_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vpmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_u16(a, b); - #else - return simde_vmax_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_u16 - #define vpmax_u16(a, b) simde_vpmax_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vpmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmax_u32(a, b); - #else - return simde_vmax_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmax_u32 - #define vpmax_u32(a, b) simde_vpmax_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vpmaxq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpmaxq_f16(a, b); - #else - return simde_vmaxq_f16(simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_f16 - #define vpmaxq_f16(a, b) simde_vpmaxq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vpmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_f32(a, b); - #else - return simde_vmaxq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_f32 - #define vpmaxq_f32(a, b) simde_vpmaxq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vpmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_f64(a, b); - #else - return simde_vmaxq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_f64 - #define vpmaxq_f64(a, b) simde_vpmaxq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vpmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_s8(a, b); - #else - return simde_vmaxq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_s8 - #define vpmaxq_s8(a, b) simde_vpmaxq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vpmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_s16(a, b); - #else - return simde_vmaxq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_s16 - #define vpmaxq_s16(a, b) simde_vpmaxq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vpmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_s32(a, b); - #else - return simde_vmaxq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_s32 - #define vpmaxq_s32(a, b) simde_vpmaxq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vpmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_u8(a, b); - #else - return simde_vmaxq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_u8 - #define vpmaxq_u8(a, b) simde_vpmaxq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vpmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_u16(a, b); - #else - return simde_vmaxq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_u16 - #define vpmaxq_u16(a, b) simde_vpmaxq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vpmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxq_u32(a, b); - #else - return simde_vmaxq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmaxq_u32 - #define vpmaxq_u32(a, b) simde_vpmaxq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_PMAX_H) */ -/* :: End simde/arm/neon/pmax.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/pmaxnm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_PMAXNM_H) -#define SIMDE_ARM_NEON_PMAXNM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vpmaxnms_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxnms_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnms_f32 - #define vpmaxnms_f32(a) simde_vpmaxnms_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vpmaxnmqd_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxnmqd_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnmqd_f64 - #define vpmaxnmqd_f64(a) simde_vpmaxnmqd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vpmaxnm_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpmaxnm_f16(a, b); - #else - return simde_vmax_f16(simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnm_f16 - #define vpmaxnm_f16(a, b) simde_vpmaxnm_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vpmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxnm_f32(a, b); - #else - return simde_vmax_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnm_f32 - #define vpmaxnm_f32(a, b) simde_vpmaxnm_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vpmaxnmq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpmaxnmq_f16(a, b); - #else - return simde_vmaxq_f16(simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnmq_f16 - #define vpmaxnmq_f16(a, b) simde_vpmaxnmq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vpmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxnmq_f32(a, b); - #else - return simde_vmaxq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnmq_f32 - #define vpmaxnmq_f32(a, b) simde_vpmaxnmq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vpmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmaxnmq_f64(a, b); - #else - return simde_vmaxq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmaxnmq_f64 - #define vpmaxnmq_f64(a, b) simde_vpmaxnmq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_PMAXNM_H) */ -/* :: End simde/arm/neon/pmaxnm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/pmin.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_PMIN_H) -#define SIMDE_ARM_NEON_PMIN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vpmins_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpmins_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpmins_f32 - #define vpmins_f32(a) simde_vpmins_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vpminqd_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminqd_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminqd_f64 - #define vpminqd_f64(a) simde_vpminqd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vpmin_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpmin_f16(a, b); - #else - return simde_vmin_f16(simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vpmin_f16 - #define vpmin_f16(a, b) simde_vpmin_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vpmin_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_f32(a, b); - #else - return simde_vmin_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_f32 - #define vpmin_f32(a, b) simde_vpmin_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vpmin_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_s8(a, b); - #else - return simde_vmin_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_s8 - #define vpmin_s8(a, b) simde_vpmin_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vpmin_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_s16(a, b); - #else - return simde_vmin_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_s16 - #define vpmin_s16(a, b) simde_vpmin_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vpmin_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_s32(a, b); - #else - return simde_vmin_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_s32 - #define vpmin_s32(a, b) simde_vpmin_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vpmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_u8(a, b); - #else - return simde_vmin_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_u8 - #define vpmin_u8(a, b) simde_vpmin_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vpmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_u16(a, b); - #else - return simde_vmin_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_u16 - #define vpmin_u16(a, b) simde_vpmin_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vpmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vpmin_u32(a, b); - #else - return simde_vmin_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpmin_u32 - #define vpmin_u32(a, b) simde_vpmin_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vpminq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpminq_f16(a, b); - #else - return simde_vminq_f16(simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminq_f16 - #define vpminq_f16(a, b) simde_vpminq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vpminq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_f32(a, b); - #elif defined(SIMDE_X86_SSE3_NATIVE) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_X86_SSE3_NATIVE) - __m128 e = _mm_shuffle_ps(a_.m128, b_.m128, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 o = _mm_shuffle_ps(a_.m128, b_.m128, _MM_SHUFFLE(3, 1, 3, 1)); - r_.m128 = _mm_min_ps(e, o); - #endif - - return simde_float32x4_from_private(r_); - #else - return simde_vminq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_f32 - #define vpminq_f32(a, b) simde_vpminq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vpminq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_f64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128d e = _mm_unpacklo_pd(a_.m128d, b_.m128d); - __m128d o = _mm_unpackhi_pd(a_.m128d, b_.m128d); - r_.m128d = _mm_min_pd(e, o); - #endif - - return simde_float64x2_from_private(r_); - #else - return simde_vminq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminq_f64 - #define vpminq_f64(a, b) simde_vpminq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vpminq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_s8(a, b); - #else - return simde_vminq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_s8 - #define vpminq_s8(a, b) simde_vpminq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vpminq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_s16(a, b); - #else - return simde_vminq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_s16 - #define vpminq_s16(a, b) simde_vpminq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vpminq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_s32(a, b); - #else - return simde_vminq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_s32 - #define vpminq_s32(a, b) simde_vpminq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vpminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_u8(a, b); - #else - return simde_vminq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_u8 - #define vpminq_u8(a, b) simde_vpminq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vpminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_u16(a, b); - #else - return simde_vminq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_u16 - #define vpminq_u16(a, b) simde_vpminq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vpminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminq_u32(a, b); - #else - return simde_vminq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vpminq_u32 - #define vpminq_u32(a, b) simde_vpminq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_PMIN_H) */ -/* :: End simde/arm/neon/pmin.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/pminnm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_PMINNM_H) -#define SIMDE_ARM_NEON_PMINNM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vpminnms_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminnms_f32(a); - #else - simde_float32x2_private a_ = simde_float32x2_to_private(a); - return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnms_f32 - #define vpminnms_f32(a) simde_vpminnms_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vpminnmqd_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminnmqd_f64(a); - #else - simde_float64x2_private a_ = simde_float64x2_to_private(a); - return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnmqd_f64 - #define vpminnmqd_f64(a) simde_vpminnmqd_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vpminnm_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpminnm_f16(a, b); - #else - return simde_vmin_f16(simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnm_f16 - #define vpminnm_f16(a, b) simde_vpminnm_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vpminnm_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminnm_f32(a, b); - #else - return simde_vmin_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnm_f32 - #define vpminnm_f32(a, b) simde_vpminnm_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vpminnmq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vpminnmq_f16(a, b); - #else - return simde_vminq_f16(simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnmq_f16 - #define vpminnmq_f16(a, b) simde_vpminnmq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vpminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminnmq_f32(a, b); - #else - return simde_vminq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnmq_f32 - #define vpminnmq_f32(a, b) simde_vpminnmq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vpminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vpminnmq_f64(a, b); - #else - return simde_vminq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vpminnmq_f64 - #define vpminnmq_f64(a, b) simde_vpminnmq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_PMINNM_H) */ -/* :: End simde/arm/neon/pminnm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qabs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_QABS_H) -#define SIMDE_ARM_NEON_QABS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqabsb_s8(int8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabsb_s8(a); - #else - return a == INT8_MIN ? INT8_MAX : (a < 0 ? -a : a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabsb_s8 - #define vqabsb_s8(a) simde_vqabsb_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqabsh_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabsh_s16(a); - #else - return a == INT16_MIN ? INT16_MAX : (a < 0 ? -a : a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabsh_s16 - #define vqabsh_s16(a) simde_vqabsh_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqabss_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabss_s32(a); - #else - return a == INT32_MIN ? INT32_MAX : (a < 0 ? -a : a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabss_s32 - #define vqabss_s32(a) simde_vqabss_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqabsd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabsd_s64(a); - #else - return a == INT64_MIN ? INT64_MAX : (a < 0 ? -a : a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabsd_s64 - #define vqabsd_s64(a) simde_vqabsd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqabs_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabs_s8(a); - #else - simde_int8x8_t tmp = simde_vabs_s8(a); - return simde_vadd_s8(tmp, simde_vshr_n_s8(tmp, 7)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabs_s8 - #define vqabs_s8(a) simde_vqabs_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqabs_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabs_s16(a); - #else - simde_int16x4_t tmp = simde_vabs_s16(a); - return simde_vadd_s16(tmp, simde_vshr_n_s16(tmp, 15)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabs_s16 - #define vqabs_s16(a) simde_vqabs_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqabs_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabs_s32(a); - #else - simde_int32x2_t tmp = simde_vabs_s32(a); - return simde_vadd_s32(tmp, simde_vshr_n_s32(tmp, 31)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabs_s32 - #define vqabs_s32(a) simde_vqabs_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqabs_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabs_s64(a); - #else - simde_int64x1_t tmp = simde_vabs_s64(a); - return simde_vadd_s64(tmp, simde_vshr_n_s64(tmp, 63)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabs_s64 - #define vqabs_s64(a) simde_vqabs_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqabsq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabsq_s8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(simde_vabsq_s8(a)); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epu8(a_.m128i, _mm_set1_epi8(INT8_MAX)); - #else - r_.m128i = - _mm_add_epi8( - a_.m128i, - _mm_cmpgt_epi8(_mm_setzero_si128(), a_.m128i) - ); - #endif - - return simde_int8x16_from_private(r_); - #else - simde_int8x16_t tmp = simde_vabsq_s8(a); - return - simde_vbslq_s8( - simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(tmp, 7)), - simde_vmvnq_s8(tmp), - tmp - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabsq_s8 - #define vqabsq_s8(a) simde_vqabsq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqabsq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabsq_s16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(simde_vabsq_s16(a)); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epu16(a_.m128i, _mm_set1_epi16(INT16_MAX)); - #else - r_.m128i = - _mm_add_epi16( - a_.m128i, - _mm_srai_epi16(a_.m128i, 15) - ); - #endif - - return simde_int16x8_from_private(r_); - #else - simde_int16x8_t tmp = simde_vabsq_s16(a); - return - simde_vbslq_s16( - simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(tmp, 15)), - simde_vmvnq_s16(tmp), - tmp - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabsq_s16 - #define vqabsq_s16(a) simde_vqabsq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqabsq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqabsq_s32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(simde_vabsq_s32(a)); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = _mm_min_epu32(a_.m128i, _mm_set1_epi32(INT32_MAX)); - #else - r_.m128i = - _mm_add_epi32( - a_.m128i, - _mm_srai_epi32(a_.m128i, 31) - ); - #endif - - return simde_int32x4_from_private(r_); - #else - simde_int32x4_t tmp = simde_vabsq_s32(a); - return - simde_vbslq_s32( - simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(tmp, 31)), - simde_vmvnq_s32(tmp), - tmp - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqabsq_s32 - #define vqabsq_s32(a) simde_vqabsq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqabsq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqabsq_s64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(simde_vabsq_s64(a)); - - #if defined(SIMDE_X86_SSE4_2_NATIVE) - r_.m128i = - _mm_add_epi64( - a_.m128i, - _mm_cmpgt_epi64(_mm_setzero_si128(), a_.m128i) - ); - #else - r_.m128i = - _mm_add_epi64( - a_.m128i, - _mm_shuffle_epi32( - _mm_srai_epi32(a_.m128i, 31), - _MM_SHUFFLE(3, 3, 1, 1) - ) - ); - #endif - - return simde_int64x2_from_private(r_); - #else - simde_int64x2_t tmp = simde_vabsq_s64(a); - return - simde_vbslq_s64( - simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(tmp, 63)), - simde_vreinterpretq_s64_s32(simde_vmvnq_s32(simde_vreinterpretq_s32_s64(tmp))), - tmp - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqabsq_s64 - #define vqabsq_s64(a) simde_vqabsq_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QABS_H) */ -/* :: End simde/arm/neon/qabs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QADD_H) -#define SIMDE_ARM_NEON_QADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqaddb_s8(int8_t a, int8_t b) { - return simde_math_adds_i8(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddb_s8 - #define vqaddb_s8(a, b) simde_vqaddb_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqaddh_s16(int16_t a, int16_t b) { - return simde_math_adds_i16(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddh_s16 - #define vqaddh_s16(a, b) simde_vqaddh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqadds_s32(int32_t a, int32_t b) { - return simde_math_adds_i32(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqadds_s32 - #define vqadds_s32(a, b) simde_vqadds_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqaddd_s64(int64_t a, int64_t b) { - return simde_math_adds_i64(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddd_s64 - #define vqaddd_s64(a, b) simde_vqaddd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqaddb_u8(uint8_t a, uint8_t b) { - return simde_math_adds_u8(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddb_u8 - #define vqaddb_u8(a, b) simde_vqaddb_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqaddh_u16(uint16_t a, uint16_t b) { - return simde_math_adds_u16(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddh_u16 - #define vqaddh_u16(a, b) simde_vqaddh_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqadds_u32(uint32_t a, uint32_t b) { - return simde_math_adds_u32(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqadds_u32 - #define vqadds_u32(a, b) simde_vqadds_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqaddd_u64(uint64_t a, uint64_t b) { - return simde_math_adds_u64(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqaddd_u64 - #define vqaddd_u64(a, b) simde_vqaddd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqadd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_adds_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - uint8_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint8_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint8_t ru SIMDE_VECTOR(8) = au + bu; - - au = (au >> 7) + INT8_MAX; - - uint8_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_s8 - #define vqadd_s8(a, b) simde_vqadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqadd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_adds_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - uint16_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint16_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint16_t ru SIMDE_VECTOR(8) = au + bu; - - au = (au >> 15) + INT16_MAX; - - uint16_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_s16 - #define vqadd_s16(a, b) simde_vqadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqadd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - uint32_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint32_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint32_t ru SIMDE_VECTOR(8) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_s32 - #define vqadd_s32(a, b) simde_vqadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqadd_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - uint64_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint64_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint64_t ru SIMDE_VECTOR(8) = au + bu; - - au = (au >> 63) + INT64_MAX; - - uint64_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_s64 - #define vqadd_s64(a, b) simde_vqadd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_adds_pu8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_u8 - #define vqadd_u8(a, b) simde_vqadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_adds_pu16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_u16 - #define vqadd_u16(a, b) simde_vqadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_u32 - #define vqadd_u32(a, b) simde_vqadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqadd_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqadd_u64 - #define vqadd_u64(a, b) simde_vqadd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_add_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_adds_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SCALAR) - uint8_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint8_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint8_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 7) + INT8_MAX; - - uint8_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_s8 - #define vqaddq_s8(a, b) simde_vqaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_add_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_adds_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SCALAR) - uint16_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint16_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint16_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 15) + INT16_MAX; - - uint16_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_s16 - #define vqaddq_s16(a, b) simde_vqaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/56544654/501126 */ - const __m128i int_max = _mm_set1_epi32(INT32_MAX); - - /* normal result (possibly wraps around) */ - const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); - - /* If result saturates, it has the same sign as both a and b */ - const __m128i sign_bit = _mm_srli_epi32(a_.m128i, 31); /* shift sign to lowest bit */ - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i overflow = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, sum, 0x42); - #else - const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); - const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); - #endif - - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r_.m128i = _mm_mask_add_epi32(sum, _mm_movepi32_mask(overflow), int_max, sign_bit); - #else - const __m128i saturated = _mm_add_epi32(int_max, sign_bit); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_castps_si128( - _mm_blendv_ps( - _mm_castsi128_ps(sum), - _mm_castsi128_ps(saturated), - _mm_castsi128_ps(overflow) - ) - ); - #else - const __m128i overflow_mask = _mm_srai_epi32(overflow, 31); - r_.m128i = - _mm_or_si128( - _mm_and_si128(overflow_mask, saturated), - _mm_andnot_si128(overflow_mask, sum) - ); - #endif - #endif - #elif defined(SIMDE_VECTOR_SCALAR) - uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint32_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_s32 - #define vqaddq_s32(a, b) simde_vqaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - /* https://stackoverflow.com/a/56544654/501126 */ - const __m128i int_max = _mm_set1_epi64x(INT64_MAX); - - /* normal result (possibly wraps around) */ - const __m128i sum = _mm_add_epi64(a_.m128i, b_.m128i); - - /* If result saturates, it has the same sign as both a and b */ - const __m128i sign_bit = _mm_srli_epi64(a_.m128i, 63); /* shift sign to lowest bit */ - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i overflow = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, sum, 0x42); - #else - const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); - const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); - #endif - - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - r_.m128i = _mm_mask_add_epi64(sum, _mm_movepi64_mask(overflow), int_max, sign_bit); - #else - const __m128i saturated = _mm_add_epi64(int_max, sign_bit); - - r_.m128i = - _mm_castpd_si128( - _mm_blendv_pd( - _mm_castsi128_pd(sum), - _mm_castsi128_pd(saturated), - _mm_castsi128_pd(overflow) - ) - ); - #endif - #elif defined(SIMDE_VECTOR_SCALAR) - uint64_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); - uint64_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); - uint64_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 63) + INT64_MAX; - - uint64_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_s64 - #define vqaddq_s64(a, b) simde_vqaddq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_add_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_adds_epu8(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_u8 - #define vqaddq_u8(a, b) simde_vqaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_add_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_adds_epu16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_u16 - #define vqaddq_u16(a, b) simde_vqaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - return vec_adds(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__AVX512VL__) - __m128i notb = _mm_ternarylogic_epi32(b_.m128i, b_.m128i, b_.m128i, 0x0f); - #else - __m128i notb = _mm_xor_si128(b_.m128i, _mm_set1_epi32(~INT32_C(0))); - #endif - r_.m128i = - _mm_add_epi32( - b_.m128i, - _mm_min_epu32( - a_.m128i, - notb - ) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); - const __m128i i32min = _mm_set1_epi32(INT32_MIN); - a_.m128i = _mm_xor_si128(a_.m128i, i32min); - r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, _mm_xor_si128(i32min, sum)), sum); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_u32 - #define vqaddq_u32(a, b) simde_vqaddq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqaddq_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) - r_.values = a_.values + b_.values; - r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqaddq_u64 - #define vqaddq_u64(a, b) simde_vqaddq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QADD_H) */ -/* :: End simde/arm/neon/qadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_H) -#define SIMDE_ARM_NEON_QDMLAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlalh_s16(a, b, c); - #else - return HEDLEY_STATIC_CAST(int32_t, b) * HEDLEY_STATIC_CAST(int32_t, c) * 2 + a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlalh_s16 - #define vqdmlalh_s16(a, b, c) simde_vqdmlalh_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlals_s32(a, b, c); - #else - return HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c) * 2 + a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlals_s32 - #define vqdmlals_s32(a, b, c) simde_vqdmlals_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlal_s16(a, b, c); - #else - return simde_vaddq_s32(simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_s16 - #define vqdmlal_s16(a, b, c) simde_vqdmlal_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlal_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_s32(b), - simde_vmovl_s32(c))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_s32 - #define vqdmlal_s32(a, b, c) simde_vqdmlal_s32((a), (b), (c)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_H) */ -/* :: End simde/arm/neon/qdmlal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_H) -#define SIMDE_ARM_NEON_QDMLAL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlal_high_s16(a, b, c); - #else - return simde_vaddq_s32( - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_s16 - #define vqdmlal_high_s16(a, b, c) simde_vqdmlal_high_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlal_high_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(c))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_s32 - #define vqdmlal_high_s32(a, b, c) simde_vqdmlal_high_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_H) */ -/* :: End simde/arm/neon/qdmlal_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vaddq_s32( - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2), a); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_high_lane_s16(a, b, v, lane) vqdmlal_high_lane_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_lane_s16 - #define vqdmlal_high_lane_s16(a, b, v, lane) simde_vqdmlal_high_lane_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vaddq_s32( - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2), a); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_high_laneq_s16(a, b, v, lane) vqdmlal_high_laneq_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_laneq_s16 - #define vqdmlal_high_laneq_s16(a, b, v, lane) simde_vqdmlal_high_laneq_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_high_lane_s32(a, b, v, lane) vqdmlal_high_lane_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_lane_s32 - #define vqdmlal_high_lane_s32(a, b, v, lane) simde_vqdmlal_high_lane_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_high_laneq_s32(a, b, v, lane) vqdmlal_high_laneq_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_laneq_s32 - #define vqdmlal_high_laneq_s32(a, b, v, lane) simde_vqdmlal_high_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/qdmlal_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_N_H) -#define SIMDE_ARM_NEON_QDMLAL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlal_high_n_s16(a, b, c); - #else - return simde_vaddq_s32( - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2), a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_n_s16 - #define vqdmlal_high_n_s16(a, b, c) simde_vqdmlal_high_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlal_high_n_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_high_n_s32 - #define vqdmlal_high_n_s32(a, b, c) simde_vqdmlal_high_n_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_N_H) */ -/* :: End simde/arm/neon/qdmlal_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_LANE_H) -#define SIMDE_ARM_NEON_QDMLAL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmlal_lane_s16(a, b, v, lane) vqdmlal_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlal_lane_s16(a, b, v, lane) simde_vqdmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_lane_s16 - #define vqdmlal_lane_s16(a, b, c, lane) simde_vqdmlal_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmlal_lane_s32(a, b, v, lane) vqdmlal_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlal_lane_s32(a, b, v, lane) simde_vqdmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_lane_s32 - #define vqdmlal_lane_s32(a, b, c, lane) simde_vqdmlal_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_laneq_s16(a, b, v, lane) vqdmlal_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlal_laneq_s16(a, b, v, lane) simde_vqdmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_laneq_s16 - #define vqdmlal_laneq_s16(a, b, c, lane) simde_vqdmlal_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlal_laneq_s32(a, b, v, lane) vqdmlal_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlal_laneq_s32(a, b, v, lane) simde_vqdmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_laneq_s32 - #define vqdmlal_laneq_s32(a, b, c, lane) simde_vqdmlal_laneq_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlalh_lane_s16(a, b, v, lane) vqdmlalh_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlalh_lane_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlalh_lane_s16 - #define vqdmlalh_lane_s16(a, b, c, lane) simde_vqdmlalh_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlalh_laneq_s16(a, b, v, lane) vqdmlalh_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlalh_laneq_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlalh_laneq_s16 - #define vqdmlalh_laneq_s16(a, b, c, lane) simde_vqdmlalh_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlals_lane_s32(a, b, v, lane) vqdmlals_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlals_lane_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlals_lane_s32 - #define vqdmlals_lane_s32(a, b, c, lane) simde_vqdmlals_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlals_laneq_s32(a, b, v, lane) vqdmlals_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlals_laneq_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlals_laneq_s32 - #define vqdmlals_laneq_s32(a, b, c, lane) simde_vqdmlals_laneq_s32((a), (b), (c), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_LANE_H) */ -/* :: End simde/arm/neon/qdmlal_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlal_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLAL_N_H) -#define SIMDE_ARM_NEON_QDMLAL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlal_n_s16(a, b, c); - #else - return simde_vqdmlal_s16(a, b, simde_vdup_n_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_n_s16 - #define vqdmlal_n_s16(a, b, c) simde_vqdmlal_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlal_n_s32(a, b, c); - #else - return simde_vqdmlal_s32(a, b, simde_vdup_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlal_n_s32 - #define vqdmlal_n_s32(a, b, c) simde_vqdmlal_n_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_N_H) */ -/* :: End simde/arm/neon/qdmlal_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_H) -#define SIMDE_ARM_NEON_QDMLSL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlslh_s16(a, b, c); - #else - return a - HEDLEY_STATIC_CAST(int32_t, b) * HEDLEY_STATIC_CAST(int32_t, c) * 2; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlslh_s16 - #define vqdmlslh_s16(a, b, c) simde_vqdmlslh_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlsls_s32(a, b, c); - #else - return a - HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c) * 2; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsls_s32 - #define vqdmlsls_s32(a, b, c) simde_vqdmlsls_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlsl_s16(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_s16 - #define vqdmlsl_s16(a, b, c) simde_vqdmlsl_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlsl_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_s32(b), - simde_vmovl_s32(c))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_s32 - #define vqdmlsl_s32(a, b, c) simde_vqdmlsl_s32((a), (b), (c)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ -/* :: End simde/arm/neon/qdmlsl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_H) -#define SIMDE_ARM_NEON_QDMLSL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlsl_high_s16(a, b, c); - #else - return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_s16 - #define vqdmlsl_high_s16(a, b, c) simde_vqdmlsl_high_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlsl_high_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(c))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_s32 - #define vqdmlsl_high_s32(a, b, c) simde_vqdmlsl_high_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_H) */ -/* :: End simde/arm/neon/qdmlsl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_high_lane_s16(a, b, v, lane) vqdmlsl_high_lane_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_lane_s16 - #define vqdmlsl_high_lane_s16(a, b, v, lane) simde_vqdmlsl_high_lane_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_high_laneq_s16(a, b, v, lane) vqdmlsl_high_laneq_s16(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_laneq_s16 - #define vqdmlsl_high_laneq_s16(a, b, v, lane) simde_vqdmlsl_high_laneq_s16((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_high_lane_s32(a, b, v, lane) vqdmlsl_high_lane_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_lane_s32 - #define vqdmlsl_high_lane_s32(a, b, v, lane) simde_vqdmlsl_high_lane_s32((a), (b), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_high_laneq_s32(a, b, v, lane) vqdmlsl_high_laneq_s32(a, b, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_laneq_s32 - #define vqdmlsl_high_laneq_s32(a, b, v, lane) simde_vqdmlsl_high_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/qdmlsl_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_N_H) -#define SIMDE_ARM_NEON_QDMLSL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlsl_high_n_s16(a, b, c); - #else - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_n_s16 - #define vqdmlsl_high_n_s16(a, b, c) simde_vqdmlsl_high_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmlsl_high_n_s32(a, b, c); - #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_high_n_s32 - #define vqdmlsl_high_n_s32(a, b, c) simde_vqdmlsl_high_n_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_N_H) */ -/* :: End simde/arm/neon/qdmlsl_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_LANE_H) -#define SIMDE_ARM_NEON_QDMLSL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmlsl_lane_s16(a, b, v, lane) vqdmlsl_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlsl_lane_s16(a, b, v, lane) simde_vqdmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_lane_s16 - #define vqdmlsl_lane_s16(a, b, c, lane) simde_vqdmlsl_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmlsl_lane_s32(a, b, v, lane) vqdmlsl_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlsl_lane_s32(a, b, v, lane) simde_vqdmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_lane_s32 - #define vqdmlsl_lane_s32(a, b, c, lane) simde_vqdmlsl_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_laneq_s16(a, b, v, lane) vqdmlsl_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlsl_laneq_s16(a, b, v, lane) simde_vqdmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_laneq_s16 - #define vqdmlsl_laneq_s16(a, b, c, lane) simde_vqdmlsl_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsl_laneq_s32(a, b, v, lane) vqdmlsl_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlsl_laneq_s32(a, b, v, lane) simde_vqdmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_laneq_s32 - #define vqdmlsl_laneq_s32(a, b, c, lane) simde_vqdmlsl_laneq_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlslh_lane_s16(a, b, v, lane) vqdmlslh_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlslh_lane_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlslh_lane_s16 - #define vqdmlslh_lane_s16(a, b, c, lane) simde_vqdmlslh_lane_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlslh_laneq_s16(a, b, v, lane) vqdmlslh_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqdmlslh_laneq_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlslh_laneq_s16 - #define vqdmlslh_laneq_s16(a, b, c, lane) simde_vqdmlslh_laneq_s16((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsls_lane_s32(a, b, v, lane) vqdmlsls_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlsls_lane_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsls_lane_s32 - #define vqdmlsls_lane_s32(a, b, c, lane) simde_vqdmlsls_lane_s32((a), (b), (c), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmlsls_laneq_s32(a, b, v, lane) vqdmlsls_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqdmlsls_laneq_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmlsls_laneq_s32 - #define vqdmlsls_laneq_s32(a, b, c, lane) simde_vqdmlsls_laneq_s32((a), (b), (c), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDmlsl_LANE_H) */ -/* :: End simde/arm/neon/qdmlsl_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmlsl_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMLSL_N_H) -#define SIMDE_ARM_NEON_QDMLSL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlsl_n_s16(a, b, c); - #else - return simde_vqdmlsl_s16(a, b, simde_vdup_n_s16(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_n_s16 - #define vqdmlsl_n_s16(a, b, c) simde_vqdmlsl_n_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmlsl_n_s32(a, b, c); - #else - return simde_vqdmlsl_s32(a, b, simde_vdup_n_s32(c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmlsl_n_s32 - #define vqdmlsl_n_s32(a, b, c) simde_vqdmlsl_n_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_N_H) */ -/* :: End simde/arm/neon/qdmlsl_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmulh.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULH_H) -#define SIMDE_ARM_NEON_QDMULH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Implementation notes (seanptmaher): - * - * It won't overflow during the multiplication, it'll ever only double - * the bit length, we only care about the overflow during the shift, - * so do the multiplication, then the shift with saturation - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_H) -#define SIMDE_ARM_NEON_QDMULL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmullh_s16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmullh_s16(a, b); - #else - int32_t mul = (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)); - return (simde_math_labs(mul) & (1 << 30)) ? ((mul < 0) ? INT32_MIN : INT32_MAX) : mul << 1; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmullh_s16 - #define vqdmullh_s16(a, b) simde_vqdmullh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqdmulls_s32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmulls_s32(a, b); - #else - int64_t mul = (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)); - return ((a > 0 ? a : -a) & (HEDLEY_STATIC_CAST(int64_t, 1) << 62)) ? ((mul < 0) ? INT64_MIN : INT64_MAX) : mul << 1; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulls_s32 - #define vqdmulls_s32(a, b) simde_vqdmulls_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmull_s16(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int32x4_private r_; - simde_int16x8_private v_ = simde_int16x8_to_private(simde_vcombine_s16(a, b)); - - const v128_t lo = wasm_i32x4_extend_low_i16x8(v_.v128); - const v128_t hi = wasm_i32x4_extend_high_i16x8(v_.v128); - - const v128_t product = wasm_i32x4_mul(lo, hi); - const v128_t uflow = wasm_i32x4_lt(product, wasm_i32x4_splat(-INT32_C(0x40000000))); - const v128_t oflow = wasm_i32x4_gt(product, wasm_i32x4_splat( INT32_C(0x3FFFFFFF))); - r_.v128 = wasm_i32x4_shl(product, 1); - r_.v128 = wasm_v128_bitselect(wasm_i32x4_splat(INT32_MIN), r_.v128, uflow); - r_.v128 = wasm_v128_bitselect(wasm_i32x4_splat(INT32_MAX), r_.v128, oflow); - - return simde_int32x4_from_private(r_); - #else - simde_int32x4_private r_; - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_s16 - #define vqdmull_s16(a, b) simde_vqdmull_s16((a), (b)) -#endif -SIMDE_FUNCTION_ATTRIBUTES - -simde_int64x2_t -simde_vqdmull_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmull_s32(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int64x2_private r_; - simde_int32x4_private v_ = simde_int32x4_to_private(simde_vcombine_s32(a, b)); - - const v128_t lo = wasm_i64x2_extend_low_i32x4(v_.v128); - const v128_t hi = wasm_i64x2_extend_high_i32x4(v_.v128); - - const v128_t product = wasm_i64x2_mul(lo, hi); - const v128_t uflow = wasm_i64x2_lt(product, wasm_i64x2_splat(-INT64_C(0x4000000000000000))); - const v128_t oflow = wasm_i64x2_gt(product, wasm_i64x2_splat( INT64_C(0x3FFFFFFFFFFFFFFF))); - r_.v128 = wasm_i64x2_shl(product, 1); - r_.v128 = wasm_v128_bitselect(wasm_i64x2_splat(INT64_MIN), r_.v128, uflow); - r_.v128 = wasm_v128_bitselect(wasm_i64x2_splat(INT64_MAX), r_.v128, oflow); - - return simde_int64x2_from_private(r_); - #else - simde_int64x2_private r_; - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_s32 - #define vqdmull_s32(a, b) simde_vqdmull_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_H) */ -/* :: End simde/arm/neon/qdmull.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmulhs_s32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmulhs_s32(a, b); - #else - int64_t tmp = simde_vqdmulls_s32(a, b); - return HEDLEY_STATIC_CAST(int32_t, tmp >> 32); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhs_s32 - #define vqdmulhs_s32(a) simde_vqdmulhs_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmulh_s16(a, b); - #else - simde_int16x4_private r_; - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !(HEDLEY_GCC_VERSION_CHECK(12,1,0) && defined(SIMDE_ARCH_ZARCH)) - simde_int16x8_private tmp_ = - simde_int16x8_to_private( - simde_vreinterpretq_s16_s32( - simde_vqdmull_s16(a, b) - ) - ); - - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); - #else - simde_int32x4_private tmp = simde_int32x4_to_private(simde_vqdmull_s16(a, b)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, tmp.values[i] >> 16); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_s16 - #define vqdmulh_s16(a, b) simde_vqdmulh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqdmulhh_s16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmulhh_s16(a, b); - #else - int32_t tmp = simde_vqdmullh_s16(a, b); - return HEDLEY_STATIC_CAST(int16_t, tmp >> 16); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhh_s16 - #define vqdmulhh_s16(a, b) simde_vqdmulhh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmulh_s32(a, b); - #else - simde_int32x2_private r_; - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !(HEDLEY_GCC_VERSION_CHECK(12,1,0) && defined(SIMDE_ARCH_ZARCH)) - simde_int32x4_private tmp_ = - simde_int32x4_to_private( - simde_vreinterpretq_s32_s64( - simde_vqdmull_s32(a, b) - ) - ); - - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmulhs_s32(a_.values[i], b_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_s32 - #define vqdmulh_s32(a, b) simde_vqdmulh_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmulhq_s16(a, b); - #else - return simde_vcombine_s16(simde_vqdmulh_s16(simde_vget_low_s16(a), simde_vget_low_s16(b)), - simde_vqdmulh_s16(simde_vget_high_s16(a), simde_vget_high_s16(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_s16 - #define vqdmulhq_s16(a, b) simde_vqdmulhq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmulhq_s32(a, b); - #else - return simde_vcombine_s32(simde_vqdmulh_s32(simde_vget_low_s32(a), simde_vget_low_s32(b)), - simde_vqdmulh_s32(simde_vget_high_s32(a), simde_vget_high_s32(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_s32 - #define vqdmulhq_s32(a, b) simde_vqdmulhq_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULH_H) */ -/* :: End simde/arm/neon/qdmulh.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmulh_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULH_LANE_H) -#define SIMDE_ARM_NEON_QDMULH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmulh_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULH_N_H) -#define SIMDE_ARM_NEON_QDMULH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulh_n_s16(a, b) vqdmulh_n_s16((a), (b)) -#else - #define simde_vqdmulh_n_s16(a, b) simde_vqdmulh_s16((a), simde_vdup_n_s16(b)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_n_s16 - #define vqdmulh_n_s16(a, b) simde_vqdmulh_n_s16((a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulh_n_s32(a, b) vqdmulh_n_s32((a), (b)) -#else - #define simde_vqdmulh_n_s32(a, b) simde_vqdmulh_s32((a), simde_vdup_n_s32(b)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_n_s32 - #define vqdmulh_n_s32(a, b) simde_vqdmulh_n_s32((a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulhq_n_s16(a, b) vqdmulhq_n_s16((a), (b)) -#else - #define simde_vqdmulhq_n_s16(a, b) simde_vqdmulhq_s16((a), simde_vdupq_n_s16(b)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_n_s16 - #define vqdmulhq_n_s16(a, b) simde_vqdmulhq_n_s16((a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulhq_n_s32(a, b) vqdmulhq_n_s32((a), (b)) -#else - #define simde_vqdmulhq_n_s32(a, b) simde_vqdmulhq_s32((a), simde_vdupq_n_s32(b)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_n_s32 - #define vqdmulhq_n_s32(a, b) simde_vqdmulhq_n_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULH_N_H) */ -/* :: End simde/arm/neon/qdmulh_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulhh_lane_s16(a, v, lane) vqdmulhh_lane_s16((a), (v), (lane)) -#else - #define simde_vqdmulhh_lane_s16(a, v, lane) \ - simde_vqdmulhh_s16((a), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhh_lane_s16 - #define vqdmulhh_lane_s16(a, v, lane) simde_vqdmulhh_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulh_lane_s16(a, v, lane) vqdmulh_lane_s16((a), (v), (lane)) -#else - #define simde_vqdmulh_lane_s16(a, v, lane) \ - simde_vqdmulh_n_s16((a), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_lane_s16 - #define vqdmulh_lane_s16(a, v, lane) simde_vqdmulh_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulh_lane_s32(a, v, lane) vqdmulh_lane_s32((a), (v), (lane)) -#else - #define simde_vqdmulh_lane_s32(a, v, lane) \ - simde_vqdmulh_n_s32((a), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_lane_s32 - #define vqdmulh_lane_s32(a, v, lane) simde_vqdmulh_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulhq_lane_s16(a, v, lane) vqdmulhq_lane_s16((a), (v), (lane)) -#else - #define simde_vqdmulhq_lane_s16(a, v, lane) \ - simde_vqdmulhq_n_s16((a), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_lane_s16 - #define vqdmulhq_lane_s16(a, v, lane) simde_vqdmulhq_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmulhq_lane_s32(a, v, lane) vqdmulhq_lane_s32((a), (v), (lane)) -#else - #define simde_vqdmulhq_lane_s32(a, v, lane) \ - simde_vqdmulhq_n_s32((a), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_lane_s32 - #define vqdmulhq_lane_s32(a, v, lane) simde_vqdmulhq_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulhh_laneq_s16(a, v, lane) vqdmulhh_laneq_s16((a), (v), (lane)) -#else - #define simde_vqdmulhh_laneq_s16(a, v, lane) \ - simde_vqdmulhh_s16((a), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhh_laneq_s16 - #define vqdmulhh_laneq_s16(a, v, lane) simde_vqdmulhh_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulh_laneq_s16(a, v, lane) vqdmulh_laneq_s16((a), (v), (lane)) -#else - #define simde_vqdmulh_laneq_s16(a, v, lane) \ - simde_vqdmulh_n_s16((a), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_laneq_s16 - #define vqdmulh_laneq_s16(a, v, lane) simde_vqdmulh_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulh_laneq_s32(a, v, lane) vqdmulh_laneq_s32((a), (v), (lane)) -#else - #define simde_vqdmulh_laneq_s32(a, v, lane) \ - simde_vqdmulh_n_s32((a), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulh_laneq_s32 - #define vqdmulh_laneq_s32(a, v, lane) simde_vqdmulh_laneq_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulhq_laneq_s16(a, v, lane) vqdmulhq_laneq_s16((a), (v), (lane)) -#else - #define simde_vqdmulhq_laneq_s16(a, v, lane) \ - simde_vqdmulhq_n_s16((a), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_laneq_s16 - #define vqdmulhq_laneq_s16(a, v, lane) simde_vqdmulhq_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulhq_laneq_s32(a, v, lane) vqdmulhq_laneq_s32((a), (v), (lane)) -#else - #define simde_vqdmulhq_laneq_s32(a, v, lane) \ - simde_vqdmulhq_n_s32((a), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhq_laneq_s32 - #define vqdmulhq_laneq_s32(a, v, lane) simde_vqdmulhq_laneq_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vqdmulhs_lane_s32(a, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqdmulhs_lane_s32((a), (v), (lane))) - #else - #define simde_vqdmulhs_lane_s32(a, v, lane) vqdmulhs_lane_s32(a, v, lane) - #endif -#else - #define simde_vqdmulhs_lane_s32(a, v, lane) \ - simde_vqdmulhs_s32((a), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhs_lane_s32 - #define vqdmulhs_lane_s32(a, v, lane) simde_vqdmulhs_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vqdmulhs_laneq_s32(a, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqdmulhs_laneq_s32((a), (v), (lane))) - #else - #define simde_vqdmulhs_laneq_s32(a, v, lane) vqdmulhs_laneq_s32(a, v, lane) - #endif -#else - #define simde_vqdmulhs_laneq_s32(a, v, lane) \ - simde_vqdmulhs_s32((a), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulhs_laneq_s32 - #define vqdmulhs_laneq_s32(a, v, lane) simde_vqdmulhs_laneq_s32((a), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULH_LANE_H) */ -/* :: End simde/arm/neon/qdmulh_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_H) -#define SIMDE_ARM_NEON_QDMULL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmull_high_s16(a, b); - #else - return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vget_high_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_s16 - #define vqdmull_high_s16(a, b) simde_vqdmull_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmull_high_s32(a, b); - #else - return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vget_high_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_s32 - #define vqdmull_high_s32(a, b) simde_vqdmull_high_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_H) */ -/* :: End simde/arm/neon/qdmull_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull_high_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H) -#define SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_high_lane_s16(simde_int16x8_t a, simde_int16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_private - v_ = simde_int16x4_to_private(v); - return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_high_lane_s16(a, v, lane) vqdmull_high_lane_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_lane_s16 - #define vqdmull_high_lane_s16(a, v, lane) simde_vqdmull_high_lane_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_high_laneq_s16(simde_int16x8_t a, simde_int16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_private - v_ = simde_int16x8_to_private(v); - return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_high_laneq_s16(a, v, lane) vqdmull_high_laneq_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_laneq_s16 - #define vqdmull_high_laneq_s16(a, v, lane) simde_vqdmull_high_laneq_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_high_lane_s32(simde_int32x4_t a, simde_int32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_private - v_ = simde_int32x2_to_private(v); - return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_high_lane_s32(a, v, lane) vqdmull_high_lane_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_lane_s32 - #define vqdmull_high_lane_s32(a, v, lane) simde_vqdmull_high_lane_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_high_laneq_s32(simde_int32x4_t a, simde_int32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private - v_ = simde_int32x4_to_private(v); - return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_high_laneq_s32(a, v, lane) vqdmull_high_laneq_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_laneq_s32 - #define vqdmull_high_laneq_s32(a, v, lane) simde_vqdmull_high_laneq_s32((a), (v), (lane)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H) */ -/* :: End simde/arm/neon/qdmull_high_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_N_H) -#define SIMDE_ARM_NEON_QDMULL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_high_n_s16(simde_int16x8_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmull_high_n_s16(a, b); - #else - return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_n_s16 - #define vqdmull_high_n_s16(a, b) simde_vqdmull_high_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_high_n_s32(simde_int32x4_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqdmull_high_n_s32(a, b); - #else - return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_high_n_s32 - #define vqdmull_high_n_s32(a, b) simde_vqdmull_high_n_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_N_H) */ -/* :: End simde/arm/neon/qdmull_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_LANE_H) -#define SIMDE_ARM_NEON_QDMULL_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmullh_lane_s16(int16_t a, simde_int16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_private - v_ = simde_int16x4_to_private(v); - - return simde_vqdmullh_s16(a, v_.values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmullh_lane_s16(a, v, lane) vqdmullh_lane_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmullh_lane_s16 - #define vqdmullh_lane_s16(a, v, lane) simde_vqdmullh_lane_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqdmullh_laneq_s16(int16_t a, simde_int16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_private - v_ = simde_int16x8_to_private(v); - - return simde_vqdmullh_s16(a, v_.values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmullh_laneq_s16(a, v, lane) vqdmullh_laneq_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmullh_laneq_s16 - #define vqdmullh_laneq_s16(a, v, lane) simde_vqdmullh_laneq_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqdmulls_lane_s32(int32_t a, simde_int32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_private - v_ = simde_int32x2_to_private(v); - - return simde_vqdmulls_s32(a, v_.values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulls_lane_s32(a, v, lane) vqdmulls_lane_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulls_lane_s32 - #define vqdmulls_lane_s32(a, v, lane) simde_vqdmulls_lane_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqdmulls_laneq_s32(int32_t a, simde_int32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private - v_ = simde_int32x4_to_private(v); - - return simde_vqdmulls_s32(a, v_.values[lane]); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmulls_laneq_s32(a, v, lane) vqdmulls_laneq_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmulls_laneq_s32 - #define vqdmulls_laneq_s32(a, v, lane) simde_vqdmulls_laneq_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_private r_; - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[lane]); - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmull_lane_s16(a, v, lane) vqdmull_lane_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_lane_s16 - #define vqdmull_lane_s16(a, v, lane) simde_vqdmull_lane_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int32x4_private r_; - simde_int16x4_private - a_ = simde_int16x4_to_private(a); - simde_int16x8_private - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[lane]); - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_laneq_s16(a, v, lane) vqdmull_laneq_s16(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_laneq_s16 - #define vqdmull_laneq_s16(a, v, lane) simde_vqdmull_laneq_s16((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_private r_; - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[lane]); - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqdmull_lane_s32(a, v, lane) vqdmull_lane_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_lane_s32 - #define vqdmull_lane_s32(a, v, lane) simde_vqdmull_lane_s32((a), (v), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int64x2_private r_; - simde_int32x2_private - a_ = simde_int32x2_to_private(a); - simde_int32x4_private - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[lane]); - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqdmull_laneq_s32(a, v, lane) vqdmull_laneq_s32(a, v, lane) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqdmull_laneq_s32 - #define vqdmull_laneq_s32(a, v, lane) simde_vqdmull_laneq_s32((a), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_H) */ -/* :: End simde/arm/neon/qdmull_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qdmull_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QDMULL_N_H) -#define SIMDE_ARM_NEON_QDMULL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmull_n_s16(simde_int16x4_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmull_n_s16(a, b); - #else - return simde_vqdmull_s16(a, simde_vdup_n_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_n_s16 - #define vqdmull_n_s16(a, b) simde_vqdmull_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmull_n_s32(simde_int32x2_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqdmull_n_s32(a, b); - #else - return simde_vqdmull_s32(a, simde_vdup_n_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmull_n_s32 - #define vqdmull_n_s32(a, b) simde_vqdmull_n_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QDMULL_N_H) */ -/* :: End simde/arm/neon/qdmull_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmlah.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMLAH_H) -#define SIMDE_ARM_NEON_QRDMLAH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qmovn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QMOVN_H) -#define SIMDE_ARM_NEON_QMOVN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqmovnh_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovnh_s16(a); - #else - return (a > INT8_MAX) ? INT8_MAX : ((a < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovnh_s16 - #define vqmovnh_s16(a) simde_vqmovnh_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqmovns_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovns_s32(a); - #else - return (a > INT16_MAX) ? INT16_MAX : ((a < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovns_s32 - #define vqmovns_s32(a) simde_vqmovns_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqmovnd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovnd_s64(a); - #else - return (a > INT32_MAX) ? INT32_MAX : ((a < INT32_MIN) ? INT32_MIN : HEDLEY_STATIC_CAST(int32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovnd_s64 - #define vqmovnd_s64(a) simde_vqmovnd_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqmovnh_u16(uint16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovnh_u16(a); - #else - return (a > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovnh_u16 - #define vqmovnh_u16(a) simde_vqmovnh_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqmovns_u32(uint32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovns_u32(a); - #else - return (a > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovns_u32 - #define vqmovns_u32(a) simde_vqmovns_u32((a)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqmovnd_u64(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovnd_u64(a); - #else - return (a > UINT32_MAX) ? UINT32_MAX : HEDLEY_STATIC_CAST(uint32_t, a); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovnd_u64 - #define vqmovnd_u64(a) simde_vqmovnd_u64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqmovn_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_s16(simde_vmaxq_s16(simde_vdupq_n_s16(INT8_MIN), simde_vminq_s16(simde_vdupq_n_s16(INT8_MAX), a))); - #else - simde_int8x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovnh_s16(a_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_s16 - #define vqmovn_s16(a) simde_vqmovn_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqmovn_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_s32(simde_vmaxq_s32(simde_vdupq_n_s32(INT16_MIN), simde_vminq_s32(simde_vdupq_n_s32(INT16_MAX), a))); - #else - simde_int16x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovns_s32(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_s32 - #define vqmovn_s32(a) simde_vqmovn_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqmovn_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(INT32_MIN), simde_x_vminq_s64(simde_vdupq_n_s64(INT32_MAX), a))); - #else - simde_int32x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovnd_s64(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_s64 - #define vqmovn_s64(a) simde_vqmovn_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqmovn_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_u16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u16(simde_vminq_u16(a, simde_vdupq_n_u16(UINT8_MAX))); - #else - simde_uint8x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovnh_u16(a_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_u16 - #define vqmovn_u16(a) simde_vqmovn_u16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqmovn_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_u32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u32(simde_vminq_u32(a, simde_vdupq_n_u32(UINT16_MAX))); - #else - simde_uint16x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovns_u32(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_u32 - #define vqmovn_u32(a) simde_vqmovn_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqmovn_u64(simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovn_u64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u64(simde_x_vminq_u64(a, simde_vdupq_n_u64(UINT32_MAX))); - #else - simde_uint32x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovnd_u64(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovn_u64 - #define vqmovn_u64(a) simde_vqmovn_u64((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QMOVN_H) */ -/* :: End simde/arm/neon/qmovn.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - return SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmlahh_s16(a, b, c)); - #else - return vqrdmlahh_s16(a, b, c); - #endif - #else - int64_t r = (((1 << 15) + (HEDLEY_STATIC_CAST(int64_t, a) << 16) + ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c)))) << 1)) >> 16); - return simde_vqmovns_s32(HEDLEY_STATIC_CAST(int32_t, r)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahh_s16 - #define vqrdmlahh_s16(a, b, c) simde_vqrdmlahh_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlahs_s32(a, b, c); - #else - int64_t round_const = (HEDLEY_STATIC_CAST(int64_t, 1) << 31); - int64_t a_ = (HEDLEY_STATIC_CAST(int64_t, a) << 32); - int64_t sum = round_const + a_; - int64_t mul = (HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c)); - int64_t mul2 = mul << 1; - if (mul2 >> 1 != mul) { - if (mul > 0) return INT32_MAX; - else if (mul < 0) return INT32_MIN; - } - int64_t sum2 = sum + mul2; - if (sum > 0 && INT64_MAX - sum < mul2) return INT32_MAX; - if (sum < 0 && INT64_MIN - sum > mul2) return INT32_MIN; - return HEDLEY_STATIC_CAST(int32_t, ((sum2 >> 32) & 0xffffffff)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahs_s32 - #define vqrdmlahs_s32(a, b, c) simde_vqrdmlahs_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqrdmlah_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlah_s16(a, b, c); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b), - c_ = simde_int16x4_to_private(c); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlahh_s16(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_s16 - #define vqrdmlah_s16(a, b, c) simde_vqrdmlah_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqrdmlah_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlah_s32(a, b, c); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b), - c_ = simde_int32x2_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlahs_s32(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_s32 - #define vqrdmlah_s32(a, b, c) simde_vqrdmlah_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrdmlahq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlahq_s16(a, b, c); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - c_ = simde_int16x8_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlahh_s16(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_s16 - #define vqrdmlahq_s16(a, b, c) simde_vqrdmlahq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrdmlahq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlahq_s32(a, b, c); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - c_ = simde_int32x4_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlahs_s32(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_s32 - #define vqrdmlahq_s32(a, b, c) simde_vqrdmlahq_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMLAH_H) */ -/* :: End simde/arm/neon/qrdmlah.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmlah_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMLAH_LANE_H) -#define SIMDE_ARM_NEON_QRDMLAH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahh_lane_s16(a, b, v, lane) vqrdmlahh_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahh_lane_s16(a, b, v, lane) simde_vqrdmlahh_s16((a), (b), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahh_lane_s16 - #define vqrdmlahh_lane_s16(a, b, v, lane) simde_vqrdmlahh_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahh_laneq_s16(a, b, v, lane) vqrdmlahh_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahh_laneq_s16(a, b, v, lane) simde_vqrdmlahh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahh_laneq_s16 - #define vqrdmlahh_laneq_s16(a, b, v, lane) simde_vqrdmlahh_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahs_lane_s32(a, b, v, lane) vqrdmlahs_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahs_lane_s32(a, b, v, lane) simde_vqrdmlahs_s32((a), (b), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahs_lane_s32 - #define vqrdmlahs_lane_s32(a, b, v, lane) simde_vqrdmlahs_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahs_laneq_s32(a, b, v, lane) vqrdmlahs_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahs_laneq_s32(a, b, v, lane) simde_vqrdmlahs_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahs_laneq_s32 - #define vqrdmlahs_laneq_s32(a, b, v, lane) simde_vqrdmlahs_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlah_lane_s16(a, b, v, lane) vqrdmlah_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlah_lane_s16(a, b, v, lane) simde_vqrdmlah_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_lane_s16 - #define vqrdmlah_lane_s16(a, b, v, lane) simde_vqrdmlah_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlah_lane_s32(a, b, v, lane) vqrdmlah_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlah_lane_s32(a, b, v, lane) simde_vqrdmlah_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_lane_s32 - #define vqrdmlah_lane_s32(a, b, v, lane) simde_vqrdmlah_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahq_lane_s16(a, b, v, lane) vqrdmlahq_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahq_lane_s16(a, b, v, lane) simde_vqrdmlahq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_lane_s16 - #define vqrdmlahq_lane_s16(a, b, v, lane) simde_vqrdmlahq_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahq_lane_s32(a, b, v, lane) vqrdmlahq_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahq_lane_s32(a, b, v, lane) simde_vqrdmlahq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_lane_s32 - #define vqrdmlahq_lane_s32(a, b, v, lane) simde_vqrdmlahq_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlah_laneq_s16(a, b, v, lane) vqrdmlah_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlah_laneq_s16(a, b, v, lane) simde_vqrdmlah_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_laneq_s16 - #define vqrdmlah_laneq_s16(a, b, v, lane) simde_vqrdmlah_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlah_laneq_s32(a, b, v, lane) vqrdmlah_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlah_laneq_s32(a, b, v, lane) simde_vqrdmlah_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlah_laneq_s32 - #define vqrdmlah_laneq_s32(a, b, v, lane) simde_vqrdmlah_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahq_laneq_s16(a, b, v, lane) vqrdmlahq_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahq_laneq_s16(a, b, v, lane) simde_vqrdmlahq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_laneq_s16 - #define vqrdmlahq_laneq_s16(a, b, v, lane) simde_vqrdmlahq_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlahq_laneq_s32(a, b, v, lane) vqrdmlahq_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlahq_laneq_s32(a, b, v, lane) simde_vqrdmlahq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlahq_laneq_s32 - #define vqrdmlahq_laneq_s32(a, b, v, lane) simde_vqrdmlahq_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMLAH_LANE_H) */ -/* :: End simde/arm/neon/qrdmlah_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmlsh.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMLSH_H) -#define SIMDE_ARM_NEON_QRDMLSH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - return SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmlshh_s16(a, b, c)); - #else - return vqrdmlshh_s16(a, b, c); - #endif - #else - int64_t r = (((1 << 15) + (HEDLEY_STATIC_CAST(int64_t, a) << 16) - ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c)))) << 1)) >> 16); - return simde_vqmovns_s32(HEDLEY_STATIC_CAST(int32_t, r)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshh_s16 - #define vqrdmlshh_s16(a, b, c) simde_vqrdmlshh_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlshs_s32(a, b, c); - #else - int64_t round_const = (HEDLEY_STATIC_CAST(int64_t, 1) << 31); - int64_t a_ = (HEDLEY_STATIC_CAST(int64_t, a) << 32); - int64_t sum = round_const + a_; - int64_t mul = -(HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c)); - int64_t mul2 = mul << 1; - if (mul2 >> 1 != mul) { - if (mul > 0) return INT32_MAX; - else if (mul < 0) return INT32_MIN; - } - int64_t sum2 = sum + mul2; - if (sum > 0 && INT64_MAX - sum < mul2) return INT32_MAX; - if (sum < 0 && INT64_MIN - sum > mul2) return INT32_MIN; - return HEDLEY_STATIC_CAST(int32_t, ((sum2 >> 32) & 0xffffffff)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshs_s32 - #define vqrdmlshs_s32(a, b, c) simde_vqrdmlshs_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqrdmlsh_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlsh_s16(a, b, c); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b), - c_ = simde_int16x4_to_private(c); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlshh_s16(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_s16 - #define vqrdmlsh_s16(a, b, c) simde_vqrdmlsh_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqrdmlsh_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlsh_s32(a, b, c); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b), - c_ = simde_int32x2_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlshs_s32(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_s32 - #define vqrdmlsh_s32(a, b, c) simde_vqrdmlsh_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrdmlshq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlshq_s16(a, b, c); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b), - c_ = simde_int16x8_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlshh_s16(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_s16 - #define vqrdmlshq_s16(a, b, c) simde_vqrdmlshq_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrdmlshq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - return vqrdmlshq_s32(a, b, c); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b), - c_ = simde_int32x4_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmlshs_s32(a_.values[i], b_.values[i], c_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_s32 - #define vqrdmlshq_s32(a, b, c) simde_vqrdmlshq_s32((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMLSH_H) */ -/* :: End simde/arm/neon/qrdmlsh.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmlsh_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMLSH_LANE_H) -#define SIMDE_ARM_NEON_QRDMLSH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshh_lane_s16(a, b, v, lane) vqrdmlshh_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshh_lane_s16(a, b, v, lane) simde_vqrdmlshh_s16((a), (b), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshh_lane_s16 - #define vqrdmlshh_lane_s16(a, b, v, lane) simde_vqrdmlshh_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshh_laneq_s16(a, b, v, lane) vqrdmlshh_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshh_laneq_s16(a, b, v, lane) simde_vqrdmlshh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshh_laneq_s16 - #define vqrdmlshh_laneq_s16(a, b, v, lane) simde_vqrdmlshh_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshs_lane_s32(a, b, v, lane) vqrdmlshs_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshs_lane_s32(a, b, v, lane) simde_vqrdmlshs_s32((a), (b), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshs_lane_s32 - #define vqrdmlshs_lane_s32(a, b, v, lane) simde_vqrdmlshs_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshs_laneq_s32(a, b, v, lane) vqrdmlshs_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshs_laneq_s32(a, b, v, lane) simde_vqrdmlshs_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshs_laneq_s32 - #define vqrdmlshs_laneq_s32(a, b, v, lane) simde_vqrdmlshs_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlsh_lane_s16(a, b, v, lane) vqrdmlsh_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlsh_lane_s16(a, b, v, lane) simde_vqrdmlsh_s16((a), (b), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_lane_s16 - #define vqrdmlsh_lane_s16(a, b, v, lane) simde_vqrdmlsh_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlsh_lane_s32(a, b, v, lane) vqrdmlsh_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlsh_lane_s32(a, b, v, lane) simde_vqrdmlsh_s32((a), (b), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_lane_s32 - #define vqrdmlsh_lane_s32(a, b, v, lane) simde_vqrdmlsh_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshq_lane_s16(a, b, v, lane) vqrdmlshq_lane_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshq_lane_s16(a, b, v, lane) simde_vqrdmlshq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_lane_s16 - #define vqrdmlshq_lane_s16(a, b, v, lane) simde_vqrdmlshq_lane_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshq_lane_s32(a, b, v, lane) vqrdmlshq_lane_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshq_lane_s32(a, b, v, lane) simde_vqrdmlshq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_lane_s32 - #define vqrdmlshq_lane_s32(a, b, v, lane) simde_vqrdmlshq_lane_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlsh_laneq_s16(a, b, v, lane) vqrdmlsh_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlsh_laneq_s16(a, b, v, lane) simde_vqrdmlsh_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_laneq_s16 - #define vqrdmlsh_laneq_s16(a, b, v, lane) simde_vqrdmlsh_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlsh_laneq_s32(a, b, v, lane) vqrdmlsh_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlsh_laneq_s32(a, b, v, lane) simde_vqrdmlsh_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlsh_laneq_s32 - #define vqrdmlsh_laneq_s32(a, b, v, lane) simde_vqrdmlsh_laneq_s32((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshq_laneq_s16(a, b, v, lane) vqrdmlshq_laneq_s16((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshq_laneq_s16(a, b, v, lane) simde_vqrdmlshq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_laneq_s16 - #define vqrdmlshq_laneq_s16(a, b, v, lane) simde_vqrdmlshq_laneq_s16((a), (b), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_QRDMX) - #define simde_vqrdmlshq_laneq_s32(a, b, v, lane) vqrdmlshq_laneq_s32((a), (b), (v), (lane)) -#else - #define simde_vqrdmlshq_laneq_s32(a, b, v, lane) simde_vqrdmlshq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmlshq_laneq_s32 - #define vqrdmlshq_laneq_s32(a, b, v, lane) simde_vqrdmlshq_laneq_s32((a), (b), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMLSH_LANE_H) */ -/* :: End simde/arm/neon/qrdmlsh_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmulh.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMULH_H) -#define SIMDE_ARM_NEON_QRDMULH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqrdmulhh_s16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqrdmulhh_s16(a, b); - #else - return HEDLEY_STATIC_CAST(int16_t, (((1 << 15) + ((HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)))) << 1)) >> 16) & 0xffff); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhh_s16 - #define vqrdmulhh_s16(a, b) simde_vqrdmulhh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqrdmulhs_s32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqrdmulhs_s32(a, b); - #else - return HEDLEY_STATIC_CAST(int32_t, (((HEDLEY_STATIC_CAST(int64_t, 1) << 31) + ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)))) << 1)) >> 32) & 0xffffffff); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhs_s32 - #define vqrdmulhs_s32(a, b) simde_vqrdmulhs_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqrdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulh_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_s16 - #define vqrdmulh_s16(a, b) simde_vqrdmulh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqrdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulh_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_s32 - #define vqrdmulh_s32(a, b) simde_vqrdmulh_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulhq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - /* https://github.com/WebAssembly/simd/pull/365 */ - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqrdmulhq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i y = _mm_mulhrs_epi16(a_.m128i, b_.m128i); - __m128i tmp = _mm_cmpeq_epi16(y, _mm_set1_epi16(INT16_MAX)); - r_.m128i = _mm_xor_si128(y, tmp); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i prod_lo = _mm_mullo_epi16(a_.m128i, b_.m128i); - const __m128i prod_hi = _mm_mulhi_epi16(a_.m128i, b_.m128i); - const __m128i tmp = - _mm_add_epi16( - _mm_avg_epu16( - _mm_srli_epi16(prod_lo, 14), - _mm_setzero_si128() - ), - _mm_add_epi16(prod_hi, prod_hi) - ); - r_.m128i = - _mm_xor_si128( - tmp, - _mm_cmpeq_epi16(_mm_set1_epi16(INT16_MAX), tmp) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_s16 - #define vqrdmulhq_s16(a, b) simde_vqrdmulhq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulhq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_s32 - #define vqrdmulhq_s32(a, b) simde_vqrdmulhq_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ -/* :: End simde/arm/neon/qrdmulh.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmulh_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMULH_LANE_H) -#define SIMDE_ARM_NEON_QRDMULH_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulhh_lane_s16(a, v, lane) vqrdmulhh_lane_s16((a), (v), (lane)) -#else - #define simde_vqrdmulhh_lane_s16(a, v, lane) simde_vqrdmulhh_s16((a), simde_vget_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhh_lane_s16 - #define vqrdmulhh_lane_s16(a, v, lane) simde_vqrdmulhh_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulhh_laneq_s16(a, v, lane) vqrdmulhh_laneq_s16((a), (v), (lane)) -#else - #define simde_vqrdmulhh_laneq_s16(a, v, lane) simde_vqrdmulhh_s16((a), simde_vgetq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhh_laneq_s16 - #define vqrdmulhh_laneq_s16(a, v, lane) simde_vqrdmulhh_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vqrdmulhs_lane_s32(a, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmulhs_lane_s32((a), (v), (lane))) - #else - #define simde_vqrdmulhs_lane_s32(a, v, lane) vqrdmulhs_lane_s32((a), (v), (lane)) - #endif -#else - #define simde_vqrdmulhs_lane_s32(a, v, lane) simde_vqrdmulhs_s32((a), simde_vget_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhs_lane_s32 - #define vqrdmulhs_lane_s32(a, v, lane) simde_vqrdmulhs_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) - #define simde_vqrdmulhs_laneq_s32(a, v, lane) \ - SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmulhs_laneq_s32((a), (v), (lane))) - #else - #define simde_vqrdmulhs_laneq_s32(a, v, lane) vqrdmulhs_laneq_s32((a), (v), (lane)) - #endif -#else - #define simde_vqrdmulhs_laneq_s32(a, v, lane) simde_vqrdmulhs_s32((a), simde_vgetq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhs_laneq_s32 - #define vqrdmulhs_laneq_s32(a, v, lane) simde_vqrdmulhs_laneq_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrdmulh_lane_s16(a, v, lane) vqrdmulh_lane_s16((a), (v), (lane)) -#else - #define simde_vqrdmulh_lane_s16(a, v, lane) simde_vqrdmulh_s16((a), simde_vdup_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_lane_s16 - #define vqrdmulh_lane_s16(a, v, lane) simde_vqrdmulh_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrdmulh_lane_s32(a, v, lane) vqrdmulh_lane_s32((a), (v), (lane)) -#else - #define simde_vqrdmulh_lane_s32(a, v, lane) simde_vqrdmulh_s32((a), simde_vdup_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_lane_s32 - #define vqrdmulh_lane_s32(a, v, lane) simde_vqrdmulh_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrdmulhq_lane_s16(a, v, lane) vqrdmulhq_lane_s16((a), (v), (lane)) -#else - #define simde_vqrdmulhq_lane_s16(a, v, lane) simde_vqrdmulhq_s16((a), simde_vdupq_lane_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_lane_s16 - #define vqrdmulhq_lane_s16(a, v, lane) simde_vqrdmulhq_lane_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrdmulhq_lane_s32(a, v, lane) vqrdmulhq_lane_s32((a), (v), (lane)) -#else - #define simde_vqrdmulhq_lane_s32(a, v, lane) simde_vqrdmulhq_s32((a), simde_vdupq_lane_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_lane_s32 - #define vqrdmulhq_lane_s32(a, v, lane) simde_vqrdmulhq_lane_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulh_laneq_s16(a, v, lane) vqrdmulh_laneq_s16((a), (v), (lane)) -#else - #define simde_vqrdmulh_laneq_s16(a, v, lane) simde_vqrdmulh_s16((a), simde_vdup_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_laneq_s16 - #define vqrdmulh_laneq_s16(a, v, lane) simde_vqrdmulh_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulh_laneq_s32(a, v, lane) vqrdmulh_laneq_s32((a), (v), (lane)) -#else - #define simde_vqrdmulh_laneq_s32(a, v, lane) simde_vqrdmulh_s32((a), simde_vdup_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_laneq_s32 - #define vqrdmulh_laneq_s32(a, v, lane) simde_vqrdmulh_laneq_s32((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulhq_laneq_s16(a, v, lane) vqrdmulhq_laneq_s16((a), (v), (lane)) -#else - #define simde_vqrdmulhq_laneq_s16(a, v, lane) simde_vqrdmulhq_s16((a), simde_vdupq_laneq_s16((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_laneq_s16 - #define vqrdmulhq_laneq_s16(a, v, lane) simde_vqrdmulhq_laneq_s16((a), (v), (lane)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrdmulhq_laneq_s32(a, v, lane) vqrdmulhq_laneq_s32((a), (v), (lane)) -#else - #define simde_vqrdmulhq_laneq_s32(a, v, lane) simde_vqrdmulhq_s32((a), simde_vdupq_laneq_s32((v), (lane))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_laneq_s32 - #define vqrdmulhq_laneq_s32(a, v, lane) simde_vqrdmulhq_laneq_s32((a), (v), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_LANE_H) */ -/* :: End simde/arm/neon/qrdmulh_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrdmulh_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QRDMULH_N_H) -#define SIMDE_ARM_NEON_QRDMULH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqrdmulh_n_s16(simde_int16x4_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulh_n_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_n_s16 - #define vqrdmulh_n_s16(a, b) simde_vqrdmulh_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqrdmulh_n_s32(simde_int32x2_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulh_n_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulh_n_s32 - #define vqrdmulh_n_s32(a, b) simde_vqrdmulh_n_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrdmulhq_n_s16(simde_int16x8_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulhq_n_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_n_s16 - #define vqrdmulhq_n_s16(a, b) simde_vqrdmulhq_n_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrdmulhq_n_s32(simde_int32x4_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrdmulhq_n_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrdmulhq_n_s32 - #define vqrdmulhq_n_s32(a, b) simde_vqrdmulhq_n_s32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ -/* :: End simde/arm/neon/qrdmulh_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrshl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRSHL_H) -#define SIMDE_ARM_NEON_QRSHL_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqrshlb_s8(int8_t a, int8_t b) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqrshlb_s8(a, b); - #else - if (b < -8) { - r = 0; - } else if (b < 0) { - r = HEDLEY_STATIC_CAST(int8_t, ((a + (1 << (-b - 1))) >> -b)); - } else if (b == 0) { - r = a; - } else if (b < 7) { - r = HEDLEY_STATIC_CAST(int8_t, a << b); - if ((r >> b) != a) { - r = (a < 0) ? INT8_MIN : INT8_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT8_MIN : INT8_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshlb_s8 - #define vqrshlb_s8(a, b) simde_vqrshlb_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqrshlh_s16(int16_t a, int16_t b) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqrshlh_s16(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 <= -16) { - r = 0; - } else if (b8 < 0) { - r = HEDLEY_STATIC_CAST(int16_t, ((a + (1 << (-b8 - 1))) >> -b8)); - } else if (b8 == 0) { - r = a; - } else if (b8 < 15) { - r = HEDLEY_STATIC_CAST(int16_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT16_MIN : INT16_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT16_MIN : INT16_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshlh_s16 - #define vqrshlh_s16(a, b) simde_vqrshlh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqrshls_s32(int32_t a, int32_t b) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqrshls_s32(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 <= -32) { - r = 0; - } else if (b8 < 0) { - r = ((a + (1 << (-b8 - 1))) >> -b8); - } else if (b8 == 0) { - r = a; - } else if (b8 < 31) { - r = HEDLEY_STATIC_CAST(int32_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT32_MIN : INT32_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT32_MIN : INT32_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshls_s32 - #define vqrshls_s32(a, b) simde_vqrshls_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqrshld_s64(int64_t a, int64_t b) { - int64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqrshld_s64(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 <= -64) { - r = 0; - } else if (b8 < 0) { - r = ((a + (INT64_C(1) << (-b8 - 1))) >> -b8); - } else if (b8 == 0) { - r = a; - } else if (b8 < 63) { - r = HEDLEY_STATIC_CAST(int64_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT64_MIN : INT64_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT64_MIN : INT64_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshld_s64 - #define vqrshld_s64(a, b) simde_vqrshld_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqrshlb_u8(uint8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqrshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqrshlb_u8(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqrshlb_u8(a, b); - #endif - #else - uint8_t r; - - if (b < -8) { - r = 0; - } else if (b < 0) { - r = (a >> -b) + ((a >> (-b - 1)) & 1); - } else if (b == 0) { - r = a; - } else if (b < 7) { - r = HEDLEY_STATIC_CAST(uint8_t, a << b); - if ((r >> b) != a) { - r = UINT8_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT8_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshlb_u8 - #define vqrshlb_u8(a, b) simde_vqrshlb_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqrshlh_u16(uint16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqrshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqrshlh_u16(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqrshlh_u16(a, b); - #endif - #else - b = HEDLEY_STATIC_CAST(int8_t, b); - uint16_t r; - - if (b < -16) { - r = 0; - } else if (b < 0) { - r = (a >> -b) + ((a >> (-b - 1)) & 1); - } else if (b == 0) { - r = a; - } else if (b < 15) { - r = HEDLEY_STATIC_CAST(uint16_t, a << b); - if ((r >> b) != a) { - r = UINT16_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT16_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshlh_u16 - #define vqrshlh_u16(a, b) simde_vqrshlh_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqrshls_u32(uint32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqrshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqrshls_u32(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqrshls_u32(a, b); - #endif - #else - b = HEDLEY_STATIC_CAST(int8_t, b); - uint32_t r; - - if (b < -32) { - r = 0; - } else if (b < 0) { - r = (a >> -b) + ((a >> (-b - 1)) & 1); - } else if (b == 0) { - r = a; - } else if (b < 31) { - r = HEDLEY_STATIC_CAST(uint32_t, a << b); - if ((r >> b) != a) { - r = UINT32_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT32_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshls_u32 - #define vqrshls_u32(a, b) simde_vqrshls_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqrshld_u64(uint64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqrshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqrshld_u64(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqrshld_u64(a, b); - #endif - #else - b = HEDLEY_STATIC_CAST(int8_t, b); - uint64_t r; - - if (b < -64) { - r = 0; - } else if (b < 0) { - r = (a >> -b) + ((a >> (-b - 1)) & 1); - } else if (b == 0) { - r = a; - } else if (b < 63) { - r = HEDLEY_STATIC_CAST(uint64_t, a << b); - if ((r >> b) != a) { - r = UINT64_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT64_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshld_u64 - #define vqrshld_u64(a, b) simde_vqrshld_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_s8 - #define vqrshl_s8(a, b) simde_vqrshl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_s16 - #define vqrshl_s16(a, b) simde_vqrshl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshls_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_s32 - #define vqrshl_s32(a, b) simde_vqrshl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqrshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshld_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_s64 - #define vqrshl_s64(a, b) simde_vqrshl_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqrshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_u8 - #define vqrshl_u8(a, b) simde_vqrshl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqrshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_u16 - #define vqrshl_u16(a, b) simde_vqrshl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqrshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshls_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_u32 - #define vqrshl_u32(a, b) simde_vqrshl_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqrshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshl_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - simde_int64x1_private b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshld_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshl_u64 - #define vqrshl_u64(a, b) simde_vqrshl_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_s8 - #define vqrshlq_s8(a, b) simde_vqrshlq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_s16 - #define vqrshlq_s16(a, b) simde_vqrshlq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshls_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_s32 - #define vqrshlq_s32(a, b) simde_vqrshlq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqrshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshld_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_s64 - #define vqrshlq_s64(a, b) simde_vqrshlq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqrshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_u8 - #define vqrshlq_u8(a, b) simde_vqrshlq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqrshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshlh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_u16 - #define vqrshlq_u16(a, b) simde_vqrshlq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqrshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshls_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_u32 - #define vqrshlq_u32(a, b) simde_vqrshlq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqrshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqrshlq_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - simde_int64x2_private b_ = simde_int64x2_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqrshld_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshlq_u64 - #define vqrshlq_u64(a, b) simde_vqrshlq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRSHL_H) */ -/* :: End simde/arm/neon/qrshl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrshrn_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRSHRN_HIGH_N_H) -#define SIMDE_ARM_NEON_QRSHRN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqrshrn_high_n_s16(simde_int8x8_t r, simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int16_t tmp = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); - if (tmp > INT8_MAX) tmp = INT8_MAX; - else if (tmp < INT8_MIN) tmp = INT8_MIN; - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp); - } - return simde_vcombine_s8(r, simde_vqmovn_s16(simde_int16x8_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_s16(r, a, n) vqrshrn_high_n_s16((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_s16 - #define vqrshrn_high_n_s16(r, a, n) simde_vqrshrn_high_n_s16((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqrshrn_high_n_s32(simde_int16x4_t r, simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); - if (tmp > INT16_MAX) tmp = INT16_MAX; - else if (tmp < INT16_MIN) tmp = INT16_MIN; - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, tmp); - } - return simde_vcombine_s16(r, simde_vqmovn_s32(simde_int32x4_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_s32(r, a, n) vqrshrn_high_n_s32((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_s32 - #define vqrshrn_high_n_s32(r, a, n) simde_vqrshrn_high_n_s32((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqrshrn_high_n_s64(simde_int32x2_t r, simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); - if (tmp > INT32_MAX) tmp = INT32_MAX; - else if (tmp < INT32_MIN) tmp = INT32_MIN; - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp); - } - return simde_vcombine_s32(r, simde_vqmovn_s64(simde_int64x2_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_s64(r, a, n) vqrshrn_high_n_s64((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_s64 - #define vqrshrn_high_n_s64(r, a, n) simde_vqrshrn_high_n_s64((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqrshrn_high_n_u16(simde_uint8x8_t r, simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint16_t tmp = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); - if (tmp > UINT8_MAX) tmp = UINT8_MAX; - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp); - } - return simde_vcombine_u8(r, simde_vqmovn_u16(simde_uint16x8_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_u16(r, a, n) vqrshrn_high_n_u16((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_u16 - #define vqrshrn_high_n_u16(r, a, n) simde_vqrshrn_high_n_u16((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqrshrn_high_n_u32(simde_uint16x4_t r, simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(uint32_t, UINT32_C(1) << (n - 1))) != 0); - if (tmp > UINT16_MAX) tmp = UINT16_MAX; - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp); - } - return simde_vcombine_u16(r, simde_vqmovn_u32(simde_uint32x4_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_u32(r, a, n) vqrshrn_high_n_u32((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_u32 - #define vqrshrn_high_n_u32(r, a, n) simde_vqrshrn_high_n_u32((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqrshrn_high_n_u64(simde_uint32x2_t r, simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - uint64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(uint64_t, UINT64_C(1) << (n - 1))) != 0); - if (tmp > UINT32_MAX) tmp = UINT32_MAX; - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp); - } - return simde_vcombine_u32(r, simde_vqmovn_u64(simde_uint64x2_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrn_high_n_u64(r, a, n) vqrshrn_high_n_u64((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_high_n_u64 - #define vqrshrn_high_n_u64(r, a, n) simde_vqrshrn_high_n_u64((r), (a), (n)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) */ -/* :: End simde/arm/neon/qrshrn_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrshrn_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRSHRN_N_H) -#define SIMDE_ARM_NEON_QRSHRN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rshr_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSHR_N_H) -#define SIMDE_ARM_NEON_RSHR_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/tst.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TST_H) -#define SIMDE_ARM_NEON_TST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vtstd_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vtstd_s64(a, b)); - #else - return ((a & b) != 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtstd_s64 - #define vtstd_s64(a, b) simde_vtstd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vtstd_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint64_t, vtstd_u64(a, b)); - #else - return ((a & b) != 0) ? UINT64_MAX : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtstd_u64 - #define vtstd_u64(a, b) simde_vtstd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vtstq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u8(simde_vceqzq_s8(simde_vandq_s8(a, b))); - #else - simde_int8x16_private - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - simde_uint8x16_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i8x16_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_s8 - #define vtstq_s8(a, b) simde_vtstq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vtstq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u16(simde_vceqzq_s16(simde_vandq_s16(a, b))); - #else - simde_int16x8_private - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - simde_uint16x8_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i16x8_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_s16 - #define vtstq_s16(a, b) simde_vtstq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vtstq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u32(simde_vceqzq_s32(simde_vandq_s32(a, b))); - #else - simde_int32x4_private - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - simde_uint32x4_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i32x4_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_s32 - #define vtstq_s32(a, b) simde_vtstq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vtstq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtstq_s64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vceqzq_u64(simde_vceqzq_s64(simde_vandq_s64(a, b))); - #else - simde_int64x2_private - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - simde_uint64x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vtstd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtstq_s64 - #define vtstq_s64(a, b) simde_vtstq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vtstq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u8(simde_vceqzq_u8(simde_vandq_u8(a, b))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i8x16_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_u8 - #define vtstq_u8(a, b) simde_vtstq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vtstq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u16(simde_vceqzq_u16(simde_vandq_u16(a, b))); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i16x8_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_u16 - #define vtstq_u16(a, b) simde_vtstq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vtstq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvnq_u32(simde_vceqzq_u32(simde_vandq_u32(a, b))); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i32x4_splat(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_u32 - #define vtstq_u32(a, b) simde_vtstq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vtstq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtstq_u64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vceqzq_u64(simde_vceqzq_u64(simde_vandq_u64(a, b))); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vtstd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtstq_u64 - #define vtstq_u64(a, b) simde_vtstq_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtst_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u8(simde_vceqz_s8(simde_vand_s8(a, b))); - #else - simde_int8x8_private - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - simde_uint8x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_s8 - #define vtst_s8(a, b) simde_vtst_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vtst_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u16(simde_vceqz_s16(simde_vand_s16(a, b))); - #else - simde_int16x4_private - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - simde_uint16x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_s16 - #define vtst_s16(a, b) simde_vtst_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vtst_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u32(simde_vceqz_s32(simde_vand_s32(a, b))); - #else - simde_int32x2_private - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_s32 - #define vtst_s32(a, b) simde_vtst_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vtst_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtst_s64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vceqz_u64(simde_vceqz_s64(simde_vand_s64(a, b))); - #else - simde_int64x1_private - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vtstd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtst_s64 - #define vtst_s64(a, b) simde_vtst_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtst_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u8(simde_vceqz_u8(simde_vand_u8(a, b))); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_u8 - #define vtst_u8(a, b) simde_vtst_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vtst_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u16(simde_vceqz_u16(simde_vand_u16(a, b))); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_u16 - #define vtst_u16(a, b) simde_vtst_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vtst_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmvn_u32(simde_vceqz_u32(simde_vand_u32(a, b))); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_u32 - #define vtst_u32(a, b) simde_vtst_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vtst_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtst_u64(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vceqz_u64(simde_vceqz_u64(simde_vand_u64(a, b))); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vtstd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtst_u64 - #define vtst_u64(a, b) simde_vtst_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtst_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtst_p8(a, b); - #else - simde_poly8x8_private - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - simde_uint8x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtst_p8 - #define vtst_p8(a, b) simde_vtst_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vtst_p64(simde_poly64x1_t a, simde_poly64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtst_p64(a, b); - #else - simde_poly64x1_private - a_ = simde_poly64x1_to_private(a), - b_ = simde_poly64x1_to_private(b); - simde_uint64x1_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtst_p64 - #define vtst_p64(a, b) simde_vtst_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vtstq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtstq_p8(a, b); - #else - simde_poly8x16_private - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - simde_uint8x16_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtstq_p8 - #define vtstq_p8(a, b) simde_vtstq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vtstq_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtstq_p64(a, b); - #else - simde_poly64x2_private - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - simde_uint64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtstq_p64 - #define vtstq_p64(a, b) simde_vtstq_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TST_H) */ -/* :: End simde/arm/neon/tst.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_vrshrh_n_s16(int16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return (a >> ((n == 16) ? 15 : n)) + ((a & HEDLEY_STATIC_CAST(int16_t, UINT16_C(1) << (n - 1))) != 0); -} - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_x_vrshrh_n_u16(uint16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - return ((n == 16) ? 0 : (a >> n)) + ((a & (UINT32_C(1) << (n - 1))) != 0); -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_x_vrshrs_n_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return (a >> ((n == 32) ? 31 : n)) + ((a & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); -} - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_x_vrshrs_n_u32(uint32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - return ((n == 32) ? 0 : (a >> n)) + ((a & (UINT32_C(1) << (n - 1))) != 0); -} - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vrshrd_n_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return (a >> ((n == 64) ? 63 : n)) + ((a & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrd_n_s64(a, n) vrshrd_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrd_n_s64 - #define vrshrd_n_s64(a, n) simde_vrshrd_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vrshrd_n_u64(uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - return ((n == 64) ? 0 : (a >> n)) + ((a & (UINT64_C(1) << (n - 1))) != 0); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrd_n_u64(a, n) vrshrd_n_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrd_n_u64 - #define vrshrd_n_u64(a, n) simde_vrshrd_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrshrq_n_s8 (const simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_s8(a, n) vrshrq_n_s8((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_s8(a, n) simde_vsubq_s8(simde_vshrq_n_s8((a), (n)), simde_vreinterpretq_s8_u8( \ - simde_vtstq_u8(simde_vreinterpretq_u8_s8(a), \ - simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_s8 - #define vrshrq_n_s8(a, n) simde_vrshrq_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vrshrq_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_s16(a, n) vrshrq_n_s16((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_s16(a, n) simde_vsubq_s16(simde_vshrq_n_s16((a), (n)), simde_vreinterpretq_s16_u16( \ - simde_vtstq_u16(simde_vreinterpretq_u16_s16(a), \ - simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_s16 - #define vrshrq_n_s16(a, n) simde_vrshrq_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vrshrq_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_s32(a, n) vrshrq_n_s32((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_s32(a, n) simde_vsubq_s32(simde_vshrq_n_s32((a), (n)), \ - simde_vreinterpretq_s32_u32(simde_vtstq_u32(simde_vreinterpretq_u32_s32(a), \ - simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_s32 - #define vrshrq_n_s32(a, n) simde_vrshrq_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vrshrq_n_s64 (const simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_s64(a, n) vrshrq_n_s64((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_s64(a, n) simde_vsubq_s64(simde_vshrq_n_s64((a), (n)), \ - simde_vreinterpretq_s64_u64(simde_vtstq_u64(simde_vreinterpretq_u64_s64(a), \ - simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_s64 - #define vrshrq_n_s64(a, n) simde_vrshrq_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrshrq_n_u8 (const simde_uint8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_u8(a, n) vrshrq_n_u8((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_u8(a, n) simde_vsubq_u8(simde_vshrq_n_u8((a), (n)), \ - simde_vtstq_u8((a), simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_u8 - #define vrshrq_n_u8(a, n) simde_vrshrq_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vrshrq_n_u16 (const simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_u16(a, n) vrshrq_n_u16((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_u16(a, n) simde_vsubq_u16(simde_vshrq_n_u16((a), (n)), \ - simde_vtstq_u16((a), simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_u16 - #define vrshrq_n_u16(a, n) simde_vrshrq_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrshrq_n_u32 (const simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_u32(a, n) vrshrq_n_u32((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_u32(a, n) simde_vsubq_u32(simde_vshrq_n_u32((a), (n)), \ - simde_vtstq_u32((a), simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_u32 - #define vrshrq_n_u32(a, n) simde_vrshrq_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vrshrq_n_u64 (const simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrq_n_u64(a, n) vrshrq_n_u64((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshrq_n_u64(a, n) simde_vsubq_u64(simde_vshrq_n_u64((a), (n)), \ - simde_vtstq_u64((a), simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrq_n_u64 - #define vrshrq_n_u64(a, n) simde_vrshrq_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrshr_n_s8 (const simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_s8(a, n) vrshr_n_s8((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_s8(a, n) simde_vsub_s8(simde_vshr_n_s8((a), (n)), simde_vreinterpret_s8_u8( \ - simde_vtst_u8(simde_vreinterpret_u8_s8(a), \ - simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_s8 - #define vrshr_n_s8(a, n) simde_vrshr_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrshr_n_s16 (const simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_s16(a, n) vrshr_n_s16((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_s16(a, n) simde_vsub_s16(simde_vshr_n_s16((a), (n)), simde_vreinterpret_s16_u16( \ - simde_vtst_u16(simde_vreinterpret_u16_s16(a), \ - simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_s16 - #define vrshr_n_s16(a, n) simde_vrshr_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vrshr_n_s32 (const simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); - } - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_s32(a, n) vrshr_n_s32((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_s32(a, n) simde_vsub_s32(simde_vshr_n_s32((a), (n)), \ - simde_vreinterpret_s32_u32(simde_vtst_u32(simde_vreinterpret_u32_s32(a), \ - simde_vdup_n_u32(UINT32_C(1) << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_s32 - #define vrshr_n_s32(a, n) simde_vrshr_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vrshr_n_s64 (const simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); - } - - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_s64(a, n) vrshr_n_s64((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_s64(a, n) simde_vsub_s64(simde_vshr_n_s64((a), (n)), \ - simde_vreinterpret_s64_u64(simde_vtst_u64(simde_vreinterpret_u64_s64(a), \ - simde_vdup_n_u64(UINT64_C(1) << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_s64 - #define vrshr_n_s64(a, n) simde_vrshr_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrshr_n_u8 (const simde_uint8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_u8(a, n) vrshr_n_u8((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_u8(a, n) simde_vsub_u8(simde_vshr_n_u8((a), (n)), \ - simde_vtst_u8((a), simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_u8 - #define vrshr_n_u8(a, n) simde_vrshr_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrshr_n_u16 (const simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); - } - - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_u16(a, n) vrshr_n_u16((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_u16(a, n) simde_vsub_u16(simde_vshr_n_u16((a), (n)), \ - simde_vtst_u16((a), simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_u16 - #define vrshr_n_u16(a, n) simde_vrshr_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrshr_n_u32 (const simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); - } - - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_u32(a, n) vrshr_n_u32((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_u32(a, n) simde_vsub_u32(simde_vshr_n_u32((a), (n)), \ - simde_vtst_u32((a), simde_vdup_n_u32(UINT32_C(1) << ((n) - 1)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_u32 - #define vrshr_n_u32(a, n) simde_vrshr_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vrshr_n_u64 (const simde_uint64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); - } - - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshr_n_u64(a, n) vrshr_n_u64((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vrshr_n_u64(a, n) simde_vsub_u64(simde_vshr_n_u64((a), (n)), \ - simde_vtst_u64((a), simde_vdup_n_u64(UINT64_C(1) << ((n) - 1)))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshr_n_u64 - #define vrshr_n_u64(a, n) simde_vrshr_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSHR_N_H) */ -/* :: End simde/arm/neon/rshr_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrnh_n_s16(a, n) vqrshrnh_n_s16(a, n) -#else - #define simde_vqrshrnh_n_s16(a, n) simde_vqmovnh_s16(simde_x_vrshrh_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrnh_n_s16 - #define vqrshrnh_n_s16(a, n) simde_vqrshrnh_n_s16(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrnh_n_u16(a, n) vqrshrnh_n_u16(a, n) -#else - #define simde_vqrshrnh_n_u16(a, n) simde_vqmovnh_u16(simde_x_vrshrh_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrnh_n_u16 - #define vqrshrnh_n_u16(a, n) simde_vqrshrnh_n_u16(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrns_n_s32(a, n) vqrshrns_n_s32(a, n) -#else - #define simde_vqrshrns_n_s32(a, n) simde_vqmovns_s32(simde_x_vrshrs_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrns_n_s32 - #define vqrshrns_n_s32(a, n) simde_vqrshrns_n_s32(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrns_n_u32(a, n) vqrshrns_n_u32(a, n) -#else - #define simde_vqrshrns_n_u32(a, n) simde_vqmovns_u32(simde_x_vrshrs_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrns_n_u32 - #define vqrshrns_n_u32(a, n) simde_vqrshrns_n_u32(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrnd_n_s64(a, n) vqrshrnd_n_s64(a, n) -#else - #define simde_vqrshrnd_n_s64(a, n) simde_vqmovnd_s64(simde_vrshrd_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrnd_n_s64 - #define vqrshrnd_n_s64(a, n) simde_vqrshrnd_n_s64(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqrshrnd_n_u64(a, n) vqrshrnd_n_u64(a, n) -#else - #define simde_vqrshrnd_n_u64(a, n) simde_vqmovnd_u64(simde_vrshrd_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrnd_n_u64 - #define vqrshrnd_n_u64(a, n) simde_vqrshrnd_n_u64(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_s16(a, n) vqrshrn_n_s16((a), (n)) -#else - #define simde_vqrshrn_n_s16(a, n) simde_vqmovn_s16(simde_vrshrq_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_s16 - #define vqrshrn_n_s16(a, n) simde_vqrshrn_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_s32(a, n) vqrshrn_n_s32((a), (n)) -#else - #define simde_vqrshrn_n_s32(a, n) simde_vqmovn_s32(simde_vrshrq_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_s32 - #define vqrshrn_n_s32(a, n) simde_vqrshrn_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_s64(a, n) vqrshrn_n_s64((a), (n)) -#else - #define simde_vqrshrn_n_s64(a, n) simde_vqmovn_s64(simde_vrshrq_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_s64 - #define vqrshrn_n_s64(a, n) simde_vqrshrn_n_s64((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_u16(a, n) vqrshrn_n_u16((a), (n)) -#else - #define simde_vqrshrn_n_u16(a, n) simde_vqmovn_u16(simde_vrshrq_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_u16 - #define vqrshrn_n_u16(a, n) simde_vqrshrn_n_u16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_u32(a, n) vqrshrn_n_u32((a), (n)) -#else - #define simde_vqrshrn_n_u32(a, n) simde_vqmovn_u32(simde_vrshrq_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_u32 - #define vqrshrn_n_u32(a, n) simde_vqrshrn_n_u32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrn_n_u64(a, n) vqrshrn_n_u64((a), (n)) -#else - #define simde_vqrshrn_n_u64(a, n) simde_vqmovn_u64(simde_vrshrq_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrn_n_u64 - #define vqrshrn_n_u64(a, n) simde_vqrshrn_n_u64((a), (n)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRSHRN_N_H) */ -/* :: End simde/arm/neon/qrshrn_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrshrun_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H) -#define SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqrshrun_high_n_s16(simde_uint8x8_t r, simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int16_t tmp = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); - if (tmp > UINT8_MAX) tmp = UINT8_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp); - } - return simde_vcombine_u8(r, simde_vqmovn_u16(simde_uint16x8_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) - #define simde_vqrshrun_high_n_s16(r, a, n) vqrshrun_high_n_s16((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_high_n_s16 - #define vqrshrun_high_n_s16(r, a, n) simde_vqrshrun_high_n_s16((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqrshrun_high_n_s32(simde_uint16x4_t r, simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); - if (tmp > UINT16_MAX) tmp = UINT16_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp); - } - return simde_vcombine_u16(r, simde_vqmovn_u32(simde_uint32x4_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) - #define simde_vqrshrun_high_n_s32(r, a, n) vqrshrun_high_n_s32((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_high_n_s32 - #define vqrshrun_high_n_s32(r, a, n) simde_vqrshrun_high_n_s32((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqrshrun_high_n_s64(simde_uint32x2_t r, simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); - if (tmp > UINT32_MAX) tmp = UINT32_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp); - } - return simde_vcombine_u32(r, simde_vqmovn_u64(simde_uint64x2_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) - #define simde_vqrshrun_high_n_s64(r, a, n) vqrshrun_high_n_s64((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_high_n_s64 - #define vqrshrun_high_n_s64(r, a, n) simde_vqrshrun_high_n_s64((r), (a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H) */ -/* :: End simde/arm/neon/qrshrun_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qrshrun_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QRSHRUN_N_H) -#define SIMDE_ARM_NEON_QRSHRUN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qmovun.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QMOVUN_H) -#define SIMDE_ARM_NEON_QMOVUN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqmovunh_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint8_t, vqmovunh_s16(a)); - #else - return (a > UINT8_MAX) ? UINT8_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint8_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovunh_s16 - #define vqmovunh_s16(a) simde_vqmovunh_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqmovuns_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint16_t, vqmovuns_s32(a)); - #else - return (a > UINT16_MAX) ? UINT16_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint16_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovuns_s32 - #define vqmovuns_s32(a) simde_vqmovuns_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqmovund_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(uint32_t, vqmovund_s64(a)); - #else - return (a > UINT32_MAX) ? UINT32_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovund_s64 - #define vqmovund_s64(a) simde_vqmovund_s64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqmovun_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovun_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u16(simde_vreinterpretq_u16_s16(simde_vmaxq_s16(simde_vdupq_n_s16(0), simde_vminq_s16(simde_vdupq_n_s16(UINT8_MAX), a)))); - #else - simde_uint8x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovunh_s16(a_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovun_s16 - #define vqmovun_s16(a) simde_vqmovun_s16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqmovun_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovun_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u32(simde_vreinterpretq_u32_s32(simde_vmaxq_s32(simde_vdupq_n_s32(0), simde_vminq_s32(simde_vdupq_n_s32(UINT16_MAX), a)))); - #else - simde_uint16x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovuns_s32(a_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovun_s32 - #define vqmovun_s32(a) simde_vqmovun_s32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqmovun_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqmovun_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE > 0 - return simde_vmovn_u64(simde_vreinterpretq_u64_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(0), simde_x_vminq_s64(simde_vdupq_n_s64(UINT32_MAX), a)))); - #else - simde_uint32x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqmovund_s64(a_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqmovun_s64 - #define vqmovun_s64(a) simde_vqmovun_s64((a)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QMOVUN_H) */ -/* :: End simde/arm/neon/qmovun.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_71751) - #define simde_vqrshruns_n_s32(a, n) HEDLEY_STATIC_CAST(uint16_t, vqrshruns_n_s32((a), (n))) - #else - #define simde_vqrshruns_n_s32(a, n) vqrshruns_n_s32((a), (n)) - #endif -#else - #define simde_vqrshruns_n_s32(a, n) simde_vqmovuns_s32(simde_x_vrshrs_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshruns_n_s32 - #define vqrshruns_n_s32(a, n) simde_vqrshruns_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_71751) - #define simde_vqrshrund_n_s64(a, n) HEDLEY_STATIC_CAST(uint32_t, vqrshrund_n_s64((a), (n))) - #else - #define simde_vqrshrund_n_s64(a, n) vqrshrund_n_s64((a), (n)) - #endif -#else - #define simde_vqrshrund_n_s64(a, n) simde_vqmovund_s64(simde_vrshrd_n_s64((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrund_n_s64 - #define vqrshrund_n_s64(a, n) simde_vqrshrund_n_s64((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_71751) - #define simde_vqrshrunh_n_s16(a, n) HEDLEY_STATIC_CAST(uint8_t, vqrshrunh_n_s16((a), (n))) - #else - #define simde_vqrshrunh_n_s16(a, n) vqrshrunh_n_s16((a), (n)) - #endif -#else - #define simde_vqrshrunh_n_s16(a, n) simde_vqmovunh_s16(simde_x_vrshrh_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqrshrunh_n_s16 - #define vqrshrunh_n_s16(a, n) simde_vqrshrunh_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrun_n_s16(a, n) vqrshrun_n_s16((a), (n)) -#else - #define simde_vqrshrun_n_s16(a, n) simde_vqmovun_s16(simde_vrshrq_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_n_s16 - #define vqrshrun_n_s16(a, n) simde_vqrshrun_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrun_n_s32(a, n) vqrshrun_n_s32((a), (n)) -#else - #define simde_vqrshrun_n_s32(a, n) simde_vqmovun_s32(simde_vrshrq_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_n_s32 - #define vqrshrun_n_s32(a, n) simde_vqrshrun_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqrshrun_n_s64(a, n) vqrshrun_n_s64((a), (n)) -#else - #define simde_vqrshrun_n_s64(a, n) simde_vqmovun_s64(simde_vrshrq_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqrshrun_n_s64 - #define vqrshrun_n_s64(a, n) simde_vqrshrun_n_s64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QRSHRUN_N_H) */ -/* :: End simde/arm/neon/qrshrun_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qmovn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) -#define SIMDE_ARM_NEON_QMOVN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_s16(r, a); - #else - return simde_vcombine_s8(r, simde_vqmovn_s16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_s16 - #define vqmovn_high_s16(r, a) simde_vqmovn_high_s16((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_s32(r, a); - #else - return simde_vcombine_s16(r, simde_vqmovn_s32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_s32 - #define vqmovn_high_s32(r, a) simde_vqmovn_high_s32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_s64(r, a); - #else - return simde_vcombine_s32(r, simde_vqmovn_s64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_s64 - #define vqmovn_high_s64(r, a) simde_vqmovn_high_s64((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_u16(r, a); - #else - return simde_vcombine_u8(r, simde_vqmovn_u16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_u16 - #define vqmovn_high_u16(r, a) simde_vqmovn_high_u16((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_u32(r, a); - #else - return simde_vcombine_u16(r, simde_vqmovn_u32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_u32 - #define vqmovn_high_u32(r, a) simde_vqmovn_high_u32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovn_high_u64(r, a); - #else - return simde_vcombine_u32(r, simde_vqmovn_u64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovn_high_u64 - #define vqmovn_high_u64(r, a) simde_vqmovn_high_u64((r), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) */ -/* :: End simde/arm/neon/qmovn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qmovun_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QMOVUN_HIGH_H) -#define SIMDE_ARM_NEON_QMOVUN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqmovun_high_s16(simde_uint8x8_t r, simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovun_high_s16(r, a); - #else - return simde_vcombine_u8(r, simde_vqmovun_s16(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovun_high_s16 - #define vqmovun_high_s16(r, a) simde_vqmovun_high_s16((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqmovun_high_s32(simde_uint16x4_t r, simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovun_high_s32(r, a); - #else - return simde_vcombine_u16(r, simde_vqmovun_s32(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovun_high_s32 - #define vqmovun_high_s32(r, a) simde_vqmovun_high_s32((r), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqmovun_high_s64(simde_uint32x2_t r, simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqmovun_high_s64(r, a); - #else - return simde_vcombine_u32(r, simde_vqmovun_s64(a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqmovun_high_s64 - #define vqmovun_high_s64(r, a) simde_vqmovun_high_s64((r), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QMOVUN_HIGH_H) */ -/* :: End simde/arm/neon/qmovun_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qneg.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_QNEG_H) -#define SIMDE_ARM_NEON_QNEG_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE) || 1 -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqnegb_s8(int8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqnegb_s8(a); - #else - return a == INT8_MIN ? INT8_MAX : -a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqnegb_s8 - #define vqnegb_s8(a) simde_vqnegb_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqnegh_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqnegh_s16(a); - #else - return a == INT16_MIN ? INT16_MAX : -a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqnegh_s16 - #define vqnegh_s16(a) simde_vqnegh_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqnegs_s32(int32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqnegs_s32(a); - #else - return a == INT32_MIN ? INT32_MAX : -a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqnegs_s32 - #define vqnegs_s32(a) simde_vqnegs_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqnegd_s64(int64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqnegd_s64(a); - #else - return a == INT64_MIN ? INT64_MAX : -a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqnegd_s64 - #define vqnegd_s64(a) simde_vqnegd_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqneg_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqneg_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) - return simde_vneg_s8(simde_vmax_s8(a, simde_vdup_n_s8(INT8_MIN + 1))); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqneg_s8 - #define vqneg_s8(a) simde_vqneg_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqneg_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqneg_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) - return simde_vneg_s16(simde_vmax_s16(a, simde_vdup_n_s16(INT16_MIN + 1))); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqneg_s16 - #define vqneg_s16(a) simde_vqneg_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqneg_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqneg_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) - return simde_vneg_s32(simde_vmax_s32(a, simde_vdup_n_s32(INT32_MIN + 1))); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqneg_s32 - #define vqneg_s32(a) simde_vqneg_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqneg_s64(simde_int64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqneg_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vneg_s64(simde_x_vmax_s64(a, simde_vdup_n_s64(INT64_MIN + 1))); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqneg_s64 - #define vqneg_s64(a) simde_vqneg_s64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqnegq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqnegq_s8(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vnegq_s8(simde_vmaxq_s8(a, simde_vdupq_n_s8(INT8_MIN + 1))); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqnegq_s8 - #define vqnegq_s8(a) simde_vqnegq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqnegq_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqnegq_s16(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vnegq_s16(simde_vmaxq_s16(a, simde_vdupq_n_s16(INT16_MIN + 1))); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqnegq_s16 - #define vqnegq_s16(a) simde_vqnegq_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqnegq_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqnegq_s32(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vnegq_s32(simde_vmaxq_s32(a, simde_vdupq_n_s32(INT32_MIN + 1))); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqnegq_s32 - #define vqnegq_s32(a) simde_vqnegq_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqnegq_s64(simde_int64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqnegq_s64(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vnegq_s64(simde_x_vmaxq_s64(a, simde_vdupq_n_s64(INT64_MIN + 1))); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqnegq_s64 - #define vqnegq_s64(a) simde_vqnegq_s64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QNEG_H) */ -/* :: End simde/arm/neon/qneg.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qsub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_QSUB_H) -#define SIMDE_ARM_NEON_QSUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqsubb_s8(int8_t a, int8_t b) { - return simde_math_subs_i8(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubb_s8 - #define vqsubb_s8(a, b) simde_vqsubb_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqsubh_s16(int16_t a, int16_t b) { - return simde_math_subs_i16(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubh_s16 - #define vqsubh_s16(a, b) simde_vqsubh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqsubs_s32(int32_t a, int32_t b) { - return simde_math_subs_i32(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubs_s32 - #define vqsubs_s32(a, b) simde_vqsubs_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqsubd_s64(int64_t a, int64_t b) { - return simde_math_subs_i64(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubd_s64 - #define vqsubd_s64(a, b) simde_vqsubd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqsubb_u8(uint8_t a, uint8_t b) { - return simde_math_subs_u8(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubb_u8 - #define vqsubb_u8(a, b) simde_vqsubb_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqsubh_u16(uint16_t a, uint16_t b) { - return simde_math_subs_u16(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubh_u16 - #define vqsubh_u16(a, b) simde_vqsubh_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqsubs_u32(uint32_t a, uint32_t b) { - return simde_math_subs_u32(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubs_u32 - #define vqsubs_u32(a, b) simde_vqsubs_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqsubd_u64(uint64_t a, uint64_t b) { - return simde_math_subs_u64(a, b); -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqsubd_u64 - #define vqsubd_u64(a, b) simde_vqsubd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqsub_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_subs_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT8_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 7; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_s8 - #define vqsub_s8(a, b) simde_vqsub_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqsub_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_subs_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT16_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 15; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_s16 - #define vqsub_s16(a, b) simde_vqsub_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqsub_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT32_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 31; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_s32 - #define vqsub_s32(a, b) simde_vqsub_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqsub_s64(simde_int64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT64_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 63; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_s64 - #define vqsub_s64(a, b) simde_vqsub_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_subs_pu8(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_u8 - #define vqsub_u8(a, b) simde_vqsub_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_subs_pu16(a_.m64, b_.m64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_u16 - #define vqsub_u16(a, b) simde_vqsub_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_u32 - #define vqsub_u32(a, b) simde_vqsub_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsub_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a), - b_ = simde_uint64x1_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsub_u64 - #define vqsub_u64(a, b) simde_vqsub_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_sub_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_subs_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT8_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 7; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_s8 - #define vqsubq_s8(a, b) simde_vqsubq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_sub_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_subs_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT16_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 15; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_s16 - #define vqsubq_s16(a, b) simde_vqsubq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - const __m128i diff_sat = _mm_xor_si128(_mm_set1_epi32(INT32_MAX), _mm_cmpgt_epi32(b_.m128i, a_.m128i)); - const __m128i diff = _mm_sub_epi32(a_.m128i, b_.m128i); - - const __m128i t = _mm_xor_si128(diff_sat, diff); - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128i = - _mm_castps_si128( - _mm_blendv_ps( - _mm_castsi128_ps(diff), - _mm_castsi128_ps(diff_sat), - _mm_castsi128_ps(t) - ) - ); - #else - r_.m128i = _mm_xor_si128(diff, _mm_and_si128(t, _mm_srai_epi32(t, 31))); - #endif - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT32_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 31; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_s32 - #define vqsubq_s32(a, b) simde_vqsubq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT64_MAX); - const __typeof__(r_.values) diff = a_.values - b_.values; - const __typeof__(r_.values) saturate = diff_sat ^ diff; - const __typeof__(r_.values) m = saturate >> 63; - r_.values = (diff_sat & m) | (diff & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_s64 - #define vqsubq_s64(a, b) simde_vqsubq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_sub_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_subs_epu8(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values <= a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_u8 - #define vqsubq_u8(a, b) simde_vqsubq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_sub_sat(a_.v128, b_.v128); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_subs_epu16(a_.m128i, b_.m128i); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values <= a_.values); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_u16 - #define vqsubq_u16(a, b) simde_vqsubq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_subs(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - const __m128i i32_min = _mm_set1_epi32(INT32_MIN); - const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i); - r_.m128i = - _mm_and_si128( - difference, - _mm_xor_si128( - _mm_cmpgt_epi32( - _mm_xor_si128(difference, i32_min), - _mm_xor_si128(a_.m128i, i32_min) - ), - _mm_set1_epi32(~INT32_C(0)) - ) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_u32 - #define vqsubq_u32(a, b) simde_vqsubq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqsubq_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = a_.values - b_.values; - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqsubq_u64 - #define vqsubq_u64(a, b) simde_vqsubq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSUB_H) */ -/* :: End simde/arm/neon/qsub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHL_H) -#define SIMDE_ARM_NEON_QSHL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqshlb_s8(int8_t a, int8_t b) { - int8_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqshlb_s8(a, b); - #else - if (b < -7) - b = -7; - - if (b <= 0) { - r = a >> -b; - } else if (b < 7) { - r = HEDLEY_STATIC_CAST(int8_t, a << b); - if ((r >> b) != a) { - r = (a < 0) ? INT8_MIN : INT8_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT8_MIN : INT8_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlb_s8 - #define vqshlb_s8(a, b) simde_vqshlb_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqshlh_s16(int16_t a, int16_t b) { - int16_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqshlh_s16(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 < -15) - b8 = -15; - - if (b8 <= 0) { - r = a >> -b8; - } else if (b8 < 15) { - r = HEDLEY_STATIC_CAST(int16_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT16_MIN : INT16_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT16_MIN : INT16_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlh_s16 - #define vqshlh_s16(a, b) simde_vqshlh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqshls_s32(int32_t a, int32_t b) { - int32_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqshls_s32(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 < -31) - b8 = -31; - - if (b8 <= 0) { - r = a >> -b8; - } else if (b8 < 31) { - r = HEDLEY_STATIC_CAST(int32_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT32_MIN : INT32_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT32_MIN : INT32_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshls_s32 - #define vqshls_s32(a, b) simde_vqshls_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqshld_s64(int64_t a, int64_t b) { - int64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vqshld_s64(a, b); - #else - int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); - - if (b8 < -63) - b8 = -63; - - if (b8 <= 0) { - r = a >> -b8; - } else if (b8 < 63) { - r = HEDLEY_STATIC_CAST(int64_t, a << b8); - if ((r >> b8) != a) { - r = (a < 0) ? INT64_MIN : INT64_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = (a < 0) ? INT64_MIN : INT64_MAX; - } - #endif - - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshld_s64 - #define vqshld_s64(a, b) simde_vqshld_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqshlb_u8(uint8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqshlb_u8(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqshlb_u8(a, b); - #endif - #else - uint8_t r; - - if (b < -7) - b = -7; - - if (b <= 0) { - r = a >> -b; - } else if (b < 7) { - r = HEDLEY_STATIC_CAST(uint8_t, a << b); - if ((r >> b) != a) { - r = UINT8_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT8_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlb_u8 - #define vqshlb_u8(a, b) simde_vqshlb_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqshlh_u16(uint16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqshlh_u16(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqshlh_u16(a, b); - #endif - #else - uint16_t r; - - if (b < -15) - b = -15; - - if (b <= 0) { - r = a >> -b; - } else if (b < 15) { - r = HEDLEY_STATIC_CAST(uint16_t, a << b); - if ((r >> b) != a) { - r = UINT16_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT16_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlh_u16 - #define vqshlh_u16(a, b) simde_vqshlh_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqshls_u32(uint32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqshls_u32(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqshls_u32(a, b); - #endif - #else - uint32_t r; - - if (b < -31) - b = -31; - - if (b <= 0) { - r = HEDLEY_STATIC_CAST(uint32_t, a >> -b); - } else if (b < 31) { - r = a << b; - if ((r >> b) != a) { - r = UINT32_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT32_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshls_u32 - #define vqshls_u32(a, b) simde_vqshls_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqshld_u64(uint64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) - return vqshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #elif HEDLEY_HAS_WARNING("-Wsign-conversion") - HEDLEY_DIAGNOSTIC_PUSH - #pragma clang diagnostic ignored "-Wsign-conversion" - return vqshld_u64(a, b); - HEDLEY_DIAGNOSTIC_POP - #else - return vqshld_u64(a, b); - #endif - #else - uint64_t r; - - if (b < -63) - b = -63; - - if (b <= 0) { - r = a >> -b; - } else if (b < 63) { - r = HEDLEY_STATIC_CAST(uint64_t, a << b); - if ((r >> b) != a) { - r = UINT64_MAX; - } - } else if (a == 0) { - r = 0; - } else { - r = UINT64_MAX; - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshld_u64 - #define vqshld_u64(a, b) simde_vqshld_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_s8 - #define vqshl_s8(a, b) simde_vqshl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_s16 - #define vqshl_s16(a, b) simde_vqshl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_s32 - #define vqshl_s32(a, b) simde_vqshl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_s64 - #define vqshl_s64(a, b) simde_vqshl_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - simde_int8x8_private - b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_u8 - #define vqshl_u8(a, b) simde_vqshl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_int16x4_private - b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_u16 - #define vqshl_u16(a, b) simde_vqshl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_int32x2_private - b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_u32 - #define vqshl_u32(a, b) simde_vqshl_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshl_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - simde_int64x1_private - b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_u64 - #define vqshl_u64(a, b) simde_vqshl_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_s8 - #define vqshlq_s8(a, b) simde_vqshlq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_s16 - #define vqshlq_s16(a, b) simde_vqshlq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_s32 - #define vqshlq_s32(a, b) simde_vqshlq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_s64 - #define vqshlq_s64(a, b) simde_vqshlq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private - b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_u8 - #define vqshlq_u8(a, b) simde_vqshlq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_int16x8_private - b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_u16 - #define vqshlq_u16(a, b) simde_vqshlq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_int32x4_private - b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_u32 - #define vqshlq_u32(a, b) simde_vqshlq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vqshlq_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - simde_int64x2_private - b_ = simde_int64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_u64 - #define vqshlq_u64(a, b) simde_vqshlq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHL_H) */ -/* :: End simde/arm/neon/qshl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshl_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHL_N_H) -#define SIMDE_ARM_NEON_QSHL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vqshlb_n_s8(int8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - return simde_vqshlb_s8(a, HEDLEY_STATIC_CAST(int8_t, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlb_n_s8(a, n) vqshlb_n_s8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlb_n_s8 - #define vqshlb_n_s8(a, n) simde_vqshlb_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vqshlh_n_s16(int16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - return simde_vqshlh_s16(a, HEDLEY_STATIC_CAST(int16_t, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlh_n_s16(a, n) vqshlh_n_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlh_n_s16 - #define vqshlh_n_s16(a, n) simde_vqshlh_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vqshls_n_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - return simde_vqshls_s32(a, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshls_n_s32(a, n) vqshls_n_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshls_n_s32 - #define vqshls_n_s32(a, n) simde_vqshls_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vqshld_n_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - return simde_vqshld_s64(a, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshld_n_s64(a, n) vqshld_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshld_n_s64 - #define vqshld_n_s64(a, n) simde_vqshld_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqshlb_n_u8(uint8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - return simde_vqshlb_u8(a, HEDLEY_STATIC_CAST(int8_t, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlb_n_u8(a, n) vqshlb_n_u8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlb_n_u8 - #define vqshlb_n_u8(a, n) simde_vqshlb_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqshlh_n_u16(uint16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - return simde_vqshlh_u16(a, HEDLEY_STATIC_CAST(int16_t, n)); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlh_n_u16(a, n) vqshlh_n_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlh_n_u16 - #define vqshlh_n_u16(a, n) simde_vqshlh_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqshls_n_u32(uint32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - return simde_vqshls_u32(a, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshls_n_u32(a, n) vqshls_n_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshls_n_u32 - #define vqshls_n_u32(a, n) simde_vqshls_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqshld_n_u64(uint64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - return simde_vqshld_u64(a, n); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshld_n_u64(a, n) vqshld_n_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshld_n_u64 - #define vqshld_n_u64(a, n) simde_vqshld_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqshl_n_s8 (const simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_8_(simde_vqshlb_n_s8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_s8(a, n) vqshl_n_s8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_s8 - #define vqshl_n_s8(a, n) simde_vqshl_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vqshl_n_s16 (const simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_16_(simde_vqshlh_n_s16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_s16(a, n) vqshl_n_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_s16 - #define vqshl_n_s16(a, n) simde_vqshl_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vqshl_n_s32 (const simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_s32(a_.values[i], n); - } - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_s32(a, n) vqshl_n_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_s32 - #define vqshl_n_s32(a, n) simde_vqshl_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vqshl_n_s64 (const simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_s64(a_.values[i], n); - } - return simde_int64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_s64(a, n) vqshl_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_s64 - #define vqshl_n_s64(a, n) simde_vqshl_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqshl_n_u8 (const simde_uint8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_8_(simde_vqshlb_n_u8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - return simde_uint8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_u8(a, n) vqshl_n_u8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_u8 - #define vqshl_n_u8(a, n) simde_vqshl_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqshl_n_u16 (const simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_16_(simde_vqshlh_n_u16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - return simde_uint16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_u16(a, n) vqshl_n_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_u16 - #define vqshl_n_u16(a, n) simde_vqshl_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqshl_n_u32 (const simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_u32(a_.values[i], n); - } - return simde_uint32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_u32(a, n) vqshl_n_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_u32 - #define vqshl_n_u32(a, n) simde_vqshl_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqshl_n_u64 (const simde_uint64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_u64(a_.values[i], n); - } - return simde_uint64x1_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshl_n_u64(a, n) vqshl_n_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshl_n_u64 - #define vqshl_n_u64(a, n) simde_vqshl_n_u64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqshlq_n_s8 (const simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_8_(simde_vqshlb_n_s8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - - return simde_int8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_s8(a, n) vqshlq_n_s8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_s8 - #define vqshlq_n_s8(a, n) simde_vqshlq_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vqshlq_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_16_(simde_vqshlh_n_s16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_s16(a, n) vqshlq_n_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_s16 - #define vqshlq_n_s16(a, n) simde_vqshlq_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqshlq_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_s32(a_.values[i], n); - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_s32(a, n) vqshlq_n_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_s32 - #define vqshlq_n_s32(a, n) simde_vqshlq_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqshlq_n_s64 (const simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_s64(a_.values[i], n); - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_s64(a, n) vqshlq_n_s64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_s64 - #define vqshlq_n_s64(a, n) simde_vqshlq_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqshlq_n_u8 (const simde_uint8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_8_(simde_vqshlb_n_u8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_u8(a, n) vqshlq_n_u8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_u8 - #define vqshlq_n_u8(a, n) simde_vqshlq_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqshlq_n_u16 (const simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - SIMDE_CONSTIFY_16_(simde_vqshlh_n_u16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_u16(a, n) vqshlq_n_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_u16 - #define vqshlq_n_u16(a, n) simde_vqshlq_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqshlq_n_u32 (const simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshls_u32(a_.values[i], n); - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_u32(a, n) vqshlq_n_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_u32 - #define vqshlq_n_u32(a, n) simde_vqshlq_n_u32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqshlq_n_u64 (const simde_uint64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vqshld_u64(a_.values[i], n); - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlq_n_u64(a, n) vqshlq_n_u64((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlq_n_u64 - #define vqshlq_n_u64(a, n) simde_vqshlq_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHL_N_H) */ -/* :: End simde/arm/neon/qshl_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshlu_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Atharva Nimbalkar - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHLU_N_H) -#define SIMDE_ARM_NEON_QSHLU_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if defined(SIMDE_WASM_SIMD128_NATIVE) -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vqshlub_n_s8(int8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - uint8_t r = HEDLEY_STATIC_CAST(uint8_t, a << n); - r |= (((r >> n) != HEDLEY_STATIC_CAST(uint8_t, a)) ? UINT8_MAX : 0); - return (a < 0) ? 0 : r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlub_n_s8(a, n) HEDLEY_STATIC_CAST(uint8_t, vqshlub_n_s8(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlub_n_s8 - #define vqshlub_n_s8(a, n) simde_vqshlub_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vqshluh_n_s16(int16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - uint16_t r = HEDLEY_STATIC_CAST(uint16_t, a << n); - r |= (((r >> n) != HEDLEY_STATIC_CAST(uint16_t, a)) ? UINT16_MAX : 0); - return (a < 0) ? 0 : r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshluh_n_s16(a, n) HEDLEY_STATIC_CAST(uint16_t, vqshluh_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshluh_n_s16 - #define vqshluh_n_s16(a, n) simde_vqshluh_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vqshlus_n_s32(int32_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - uint32_t r = HEDLEY_STATIC_CAST(uint32_t, a << n); - r |= (((r >> n) != HEDLEY_STATIC_CAST(uint32_t, a)) ? UINT32_MAX : 0); - return (a < 0) ? 0 : r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlus_n_s32(a, n) HEDLEY_STATIC_CAST(uint32_t, vqshlus_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlus_n_s32 - #define vqshlus_n_s32(a, n) simde_vqshlus_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vqshlud_n_s64(int64_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - uint32_t r = HEDLEY_STATIC_CAST(uint32_t, a << n); - r |= (((r >> n) != HEDLEY_STATIC_CAST(uint32_t, a)) ? UINT32_MAX : 0); - return (a < 0) ? 0 : r; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshlud_n_s64(a, n) HEDLEY_STATIC_CAST(uint64_t, vqshlud_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshlud_n_s64 - #define vqshlud_n_s64(a, n) simde_vqshlud_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqshlu_n_s8(simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int16x8_private - R_, - A_ = simde_int16x8_to_private(simde_vmovl_s8(a)); - - const v128_t shifted = wasm_i16x8_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - R_.v128 = wasm_i16x8_min(shifted, wasm_i16x8_const_splat(UINT8_MAX)); - R_.v128 = wasm_i16x8_max(R_.v128, wasm_i16x8_const_splat(0)); - - return simde_vmovn_u16(simde_vreinterpretq_u16_s16( simde_int16x8_from_private(R_))); - #else - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x8_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint8_t, a_.values[i])) ? UINT8_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlu_n_s8(a, n) vqshlu_n_s8(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlu_n_s8 - #define vqshlu_n_s8(a, n) simde_vqshlu_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vqshlu_n_s16(simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int32x4_private - R_, - A_ = simde_int32x4_to_private(simde_vmovl_s16(a)); - - const v128_t shifted = wasm_i32x4_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - R_.v128 = wasm_i32x4_min(shifted, wasm_i32x4_const_splat(UINT16_MAX)); - R_.v128 = wasm_i32x4_max(R_.v128, wasm_i32x4_const_splat(0)); - - return simde_vmovn_u32(simde_vreinterpretq_u32_s32( simde_int32x4_from_private(R_))); - #else - simde_int16x4_private a_ = simde_int16x4_to_private(a); - simde_uint16x4_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint16_t, a_.values[i])) ? UINT16_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlu_n_s16(a, n) vqshlu_n_s16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlu_n_s16 - #define vqshlu_n_s16(a, n) simde_vqshlu_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vqshlu_n_s32(simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - simde_int64x2_private - R_, - A_ = simde_int64x2_to_private(simde_vmovl_s32(a)); - - const v128_t max = wasm_i64x2_const_splat(UINT32_MAX); - - const v128_t shifted = wasm_i64x2_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - R_.v128 = wasm_v128_bitselect(shifted, max, wasm_i64x2_gt(max, shifted)); - R_.v128 = wasm_v128_and(R_.v128, wasm_i64x2_gt(R_.v128, wasm_i64x2_const_splat(0))); - - return simde_vmovn_u64(simde_vreinterpretq_u64_s64( simde_int64x2_from_private(R_))); - #else - simde_int32x2_private a_ = simde_int32x2_to_private(a); - simde_uint32x2_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint32_t, a_.values[i])) ? UINT32_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlu_n_s32(a, n) vqshlu_n_s32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlu_n_s32 - #define vqshlu_n_s32(a, n) simde_vqshlu_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vqshlu_n_s64(simde_int64x1_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint64x2_private - R_, - A_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(simde_vcombine_s64(a, a))); - - R_.v128 = wasm_i64x2_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - const v128_t overflow = wasm_i64x2_ne(A_.v128, wasm_u64x2_shr(R_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - R_.v128 = wasm_v128_or(R_.v128, overflow); - R_.v128 = wasm_v128_andnot(R_.v128, wasm_i64x2_shr(A_.v128, 63)); - - return simde_vget_low_u64(simde_uint64x2_from_private(R_)); - #else - simde_int64x1_private a_ = simde_int64x1_to_private(a); - simde_uint64x1_private r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint64_t, a_.values[i])) ? UINT64_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshlu_n_s64(a, n) vqshlu_n_s64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshlu_n_s64 - #define vqshlu_n_s64(a, n) simde_vqshlu_n_s64((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqshluq_n_s8(simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x16_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - const v128_t overflow = wasm_i8x16_ne(a_.v128, wasm_u8x16_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - r_.v128 = wasm_v128_or(r_.v128, overflow); - r_.v128 = wasm_v128_andnot(r_.v128, wasm_i8x16_shr(a_.v128, 7)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint8_t, a_.values[i])) ? UINT8_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshluq_n_s8(a, n) vqshluq_n_s8(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshluq_n_s8 - #define vqshluq_n_s8(a, n) simde_vqshluq_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqshluq_n_s16(simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - const v128_t overflow = wasm_i16x8_ne(a_.v128, wasm_u16x8_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - r_.v128 = wasm_v128_or(r_.v128, overflow); - r_.v128 = wasm_v128_andnot(r_.v128, wasm_i16x8_shr(a_.v128, 15)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint16_t, a_.values[i])) ? UINT16_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshluq_n_s16(a, n) vqshluq_n_s16(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshluq_n_s16 - #define vqshluq_n_s16(a, n) simde_vqshluq_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqshluq_n_s32(simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - const v128_t overflow = wasm_i32x4_ne(a_.v128, wasm_u32x4_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - r_.v128 = wasm_v128_or(r_.v128, overflow); - r_.v128 = wasm_v128_andnot(r_.v128, wasm_i32x4_shr(a_.v128, 31)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint32_t, a_.values[i])) ? UINT32_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshluq_n_s32(a, n) vqshluq_n_s32(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshluq_n_s32 - #define vqshluq_n_s32(a, n) simde_vqshluq_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vqshluq_n_s64(simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); - const v128_t overflow = wasm_i64x2_ne(a_.v128, wasm_u64x2_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); - r_.v128 = wasm_v128_or(r_.v128, overflow); - r_.v128 = wasm_v128_andnot(r_.v128, wasm_i64x2_shr(a_.v128, 63)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; - - __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); - - r_.values = (shifted & ~overflow) | overflow; - - r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); - r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint64_t, a_.values[i])) ? UINT64_MAX : 0); - r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshluq_n_s64(a, n) vqshluq_n_s64(a, n) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshluq_n_s64 - #define vqshluq_n_s64(a, n) simde_vqshluq_n_s64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHLU_N_H) */ -/* :: End simde/arm/neon/qshlu_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshrn_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHRN_HIGH_N_H) -#define SIMDE_ARM_NEON_QSHRN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_s16(r, a, n) vqshrn_high_n_s16((r), (a), (n)) -#else - #define simde_vqshrn_high_n_s16(r, a, n) simde_vcombine_s8(r, simde_vqmovn_s16(simde_vshrq_n_s16(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_s16 - #define vqshrn_high_n_s16(r, a, n) simde_vqshrn_high_n_s16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_s32(r, a, n) vqshrn_high_n_s32((r), (a), (n)) -#else - #define simde_vqshrn_high_n_s32(r, a, n) simde_vcombine_s16(r, simde_vqmovn_s32(simde_vshrq_n_s32(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_s32 - #define vqshrn_high_n_s32(r, a, n) simde_vqshrn_high_n_s32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_s64(r, a, n) vqshrn_high_n_s64((r), (a), (n)) -#else - #define simde_vqshrn_high_n_s64(r, a, n) simde_vcombine_s32(r, simde_vqmovn_s64(simde_vshrq_n_s64(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_s64 - #define vqshrn_high_n_s64(r, a, n) simde_vqshrn_high_n_s64((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_u16(r, a, n) vqshrn_high_n_u16((r), (a), (n)) -#else - #define simde_vqshrn_high_n_u16(r, a, n) simde_vcombine_u8(r, simde_vqmovn_u16(simde_vshrq_n_u16(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_u16 - #define vqshrn_high_n_u16(r, a, n) simde_vqshrn_high_n_u16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_u32(r, a, n) vqshrn_high_n_u32((r), (a), (n)) -#else - #define simde_vqshrn_high_n_u32(r, a, n) simde_vcombine_u16(r, simde_vqmovn_u32(simde_vshrq_n_u32(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_u32 - #define vqshrn_high_n_u32(r, a, n) simde_vqshrn_high_n_u32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrn_high_n_u64(r, a, n) vqshrn_high_n_u64((r), (a), (n)) -#else - #define simde_vqshrn_high_n_u64(r, a, n) simde_vcombine_u32(r, simde_vqmovn_u64(simde_vshrq_n_u64(a, n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrn_high_n_u64 - #define vqshrn_high_n_u64(r, a, n) simde_vqshrn_high_n_u64((r), (a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHRN_HIGH_N_H) */ -/* :: End simde/arm/neon/qshrn_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshrn_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHRN_N_H) -#define SIMDE_ARM_NEON_QSHRN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrnh_n_s16(a, n) vqshrnh_n_s16(a, n) -#else - #define simde_vqshrnh_n_s16(a, n) simde_vqmovnh_s16(simde_x_vshrh_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrnh_n_s16 - #define vqshrnh_n_s16(a, n) simde_vqshrnh_n_s16(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrnh_n_u16(a, n) vqshrnh_n_u16(a, n) -#else - #define simde_vqshrnh_n_u16(a, n) simde_vqmovnh_u16(simde_x_vshrh_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrnh_n_u16 - #define vqshrnh_n_u16(a, n) simde_vqshrnh_n_u16(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrns_n_s32(a, n) vqshrns_n_s32(a, n) -#else - #define simde_vqshrns_n_s32(a, n) simde_vqmovns_s32(simde_x_vshrs_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrns_n_s32 - #define vqshrns_n_s32(a, n) simde_vqshrns_n_s32(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrns_n_u32(a, n) vqshrns_n_u32(a, n) -#else - #define simde_vqshrns_n_u32(a, n) simde_vqmovns_u32(simde_x_vshrs_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrns_n_u32 - #define vqshrns_n_u32(a, n) simde_vqshrns_n_u32(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrnd_n_s64(a, n) vqshrnd_n_s64(a, n) -#else - #define simde_vqshrnd_n_s64(a, n) simde_vqmovnd_s64(simde_vshrd_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrnd_n_s64 - #define vqshrnd_n_s64(a, n) simde_vqshrnd_n_s64(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrnd_n_u64(a, n) vqshrnd_n_u64(a, n) -#else - #define simde_vqshrnd_n_u64(a, n) simde_vqmovnd_u64(simde_vshrd_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrnd_n_u64 - #define vqshrnd_n_u64(a, n) simde_vqshrnd_n_u64(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_s16(a, n) vqshrn_n_s16((a), (n)) -#else - #define simde_vqshrn_n_s16(a, n) simde_vqmovn_s16(simde_vshrq_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_s16 - #define vqshrn_n_s16(a, n) simde_vqshrn_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_s32(a, n) vqshrn_n_s32((a), (n)) -#else - #define simde_vqshrn_n_s32(a, n) simde_vqmovn_s32(simde_vshrq_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_s32 - #define vqshrn_n_s32(a, n) simde_vqshrn_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_s64(a, n) vqshrn_n_s64((a), (n)) -#else - #define simde_vqshrn_n_s64(a, n) simde_vqmovn_s64(simde_vshrq_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_s64 - #define vqshrn_n_s64(a, n) simde_vqshrn_n_s64((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_u16(a, n) vqshrn_n_u16((a), (n)) -#else - #define simde_vqshrn_n_u16(a, n) simde_vqmovn_u16(simde_vshrq_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_u16 - #define vqshrn_n_u16(a, n) simde_vqshrn_n_u16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_u32(a, n) vqshrn_n_u32((a), (n)) -#else - #define simde_vqshrn_n_u32(a, n) simde_vqmovn_u32(simde_vshrq_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_u32 - #define vqshrn_n_u32(a, n) simde_vqshrn_n_u32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrn_n_u64(a, n) vqshrn_n_u64((a), (n)) -#else - #define simde_vqshrn_n_u64(a, n) simde_vqmovn_u64(simde_vshrq_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrn_n_u64 - #define vqshrn_n_u64(a, n) simde_vqshrn_n_u64((a), (n)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHRN_N_H) */ -/* :: End simde/arm/neon/qshrn_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshrun_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHRUN_HIGH_N_H) -#define SIMDE_ARM_NEON_QSHRUN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqshrun_high_n_s16(simde_uint8x8_t r, simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_uint16x8_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int16_t tmp = (a_.values[i]) >> n; - if (tmp > UINT8_MAX) tmp = UINT8_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp); - } - return simde_vcombine_u8(r, simde_vqmovn_u16(simde_uint16x8_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71365) - #define simde_vqshrun_high_n_s16(r, a, n) vqshrun_high_n_s16((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrun_high_n_s16 - #define vqshrun_high_n_s16(r, a, n) simde_vqshrun_high_n_s16((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vqshrun_high_n_s32(simde_uint16x4_t r, simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_uint32x4_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t tmp = (a_.values[i] >> n); - if (tmp > UINT16_MAX) tmp = UINT16_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp); - } - return simde_vcombine_u16(r, simde_vqmovn_u32(simde_uint32x4_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71365) - #define simde_vqshrun_high_n_s32(r, a, n) vqshrun_high_n_s32((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrun_high_n_s32 - #define vqshrun_high_n_s32(r, a, n) simde_vqshrun_high_n_s32((r), (a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vqshrun_high_n_s64(simde_uint32x2_t r, simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_uint64x2_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int64_t tmp = (a_.values[i] >> n); - if (tmp > UINT32_MAX) tmp = UINT32_MAX; - else if (tmp < 0) tmp = 0; - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp); - } - return simde_vcombine_u32(r, simde_vqmovn_u64(simde_uint64x2_from_private(r_))); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71365) - #define simde_vqshrun_high_n_s64(r, a, n) vqshrun_high_n_s64((r), (a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrun_high_n_s64 - #define vqshrun_high_n_s64(r, a, n) simde_vqshrun_high_n_s64((r), (a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHRUN_HIGH_N_H) */ -/* :: End simde/arm/neon/qshrun_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qshrun_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QSHRUN_N_H) -#define SIMDE_ARM_NEON_QSHRUN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrunh_n_s16(a, n) HEDLEY_STATIC_CAST(uint8_t, vqshrunh_n_s16((a), (n))) -#else - #define simde_vqshrunh_n_s16(a, n) simde_vqmovunh_s16(simde_x_vshrh_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrunh_n_s16 - #define vqshrunh_n_s16(a, n) simde_vqshrunh_n_s16(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshruns_n_s32(a, n) HEDLEY_STATIC_CAST(uint16_t, vqshruns_n_s32((a), (n))) -#else - #define simde_vqshruns_n_s32(a, n) simde_vqmovuns_s32(simde_x_vshrs_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshruns_n_s32 - #define vqshruns_n_s32(a, n) simde_vqshruns_n_s32(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vqshrund_n_s64(a, n) HEDLEY_STATIC_CAST(uint32_t, vqshrund_n_s64((a), (n))) -#else - #define simde_vqshrund_n_s64(a, n) simde_vqmovund_s64(simde_vshrd_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshrund_n_s64 - #define vqshrund_n_s64(a, n) simde_vqshrund_n_s64(a, n) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrun_n_s16(a, n) vqshrun_n_s16((a), (n)) -#else - #define simde_vqshrun_n_s16(a, n) simde_vqmovun_s16(simde_vshrq_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrun_n_s16 - #define vqshrun_n_s16(a, n) simde_vqshrun_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrun_n_s32(a, n) vqshrun_n_s32((a), (n)) -#else - #define simde_vqshrun_n_s32(a, n) simde_vqmovun_s32(simde_vshrq_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrun_n_s32 - #define vqshrun_n_s32(a, n) simde_vqshrun_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vqshrun_n_s64(a, n) vqshrun_n_s64((a), (n)) -#else - #define simde_vqshrun_n_s64(a, n) simde_vqmovun_s64(simde_vshrq_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqshrun_n_s64 - #define vqshrun_n_s64(a, n) simde_vqshrun_n_s64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QSHRUN_N_H) */ -/* :: End simde/arm/neon/qshrun_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qtbl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QTBL_H) -#define SIMDE_ARM_NEON_QTBL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbl1_u8(simde_uint8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t split; - simde_memcpy(&split, &t, sizeof(split)); - return vtbl2_u8(split, idx); - #else - simde_uint8x16_private t_ = simde_uint8x16_to_private(t); - simde_uint8x8_private - r_, - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - __m128i r128 = _mm_shuffle_epi8(t_.m128i, _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15)))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1_u8 - #define vqtbl1_u8(t, idx) simde_vqtbl1_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbl1_s8(simde_int8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1_s8(t, idx); - #else - return simde_vreinterpret_s8_u8(simde_vqtbl1_u8(simde_vreinterpretq_u8_s8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1_s8 - #define vqtbl1_s8(t, idx) simde_vqtbl1_s8((t), (idx)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbl2_u8(simde_uint8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x4_t split; - simde_memcpy(&split, &t, sizeof(split)); - return vtbl4_u8(split, idx); - #else - simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; - simde_uint8x8_private - r_, - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2_u8 - #define vqtbl2_u8(t, idx) simde_vqtbl2_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbl2_s8(simde_int8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2_s8(t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbl2_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2_s8 - #define vqtbl2_s8(t, idx) simde_vqtbl2_s8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbl3_u8(simde_uint8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t idx_hi = vsub_u8(idx, vdup_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x2_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t lo = vtbl4_u8(split_lo, idx); - uint8x8_t hi = vtbl2_u8(split_hi, idx_hi); - return vorr_u8(lo, hi); - #else - simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), - simde_uint8x16_to_private(t.val[2]) }; - simde_uint8x8_private - r_, - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); - __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3_u8 - #define vqtbl3_u8(t, idx) simde_vqtbl3_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbl3_s8(simde_int8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3_s8(t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbl3_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3_s8 - #define vqtbl3_s8(t, idx) simde_vqtbl3_s8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbl4_u8(simde_uint8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t idx_hi = vsub_u8(idx, vdup_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x4_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t lo = vtbl4_u8(split_lo, idx); - uint8x8_t hi = vtbl4_u8(split_hi, idx_hi); - return vorr_u8(lo, hi); - #else - simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), - simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; - simde_uint8x8_private - r_, - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); - __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); - __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); - __m128i r128_3 = _mm_shuffle_epi8(t_[3].m128i, idx128); - __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4_u8 - #define vqtbl4_u8(t, idx) simde_vqtbl4_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbl4_s8(simde_int8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4_s8(t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbl4_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4_s8 - #define vqtbl4_s8(t, idx) simde_vqtbl4_s8((t), (idx)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbl1q_u8(simde_uint8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1q_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t split; - simde_memcpy(&split, &t, sizeof(split)); - uint8x8_t lo = vtbl2_u8(split, vget_low_u8(idx)); - uint8x8_t hi = vtbl2_u8(split, vget_high_u8(idx)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(vec_perm(t, t, idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); - #else - simde_uint8x16_private t_ = simde_uint8x16_to_private(t); - simde_uint8x16_private - r_, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m128i = _mm_shuffle_epi8(t_.m128i, _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(15)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_swizzle(t_.v128, idx_.v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1q_u8 - #define vqtbl1q_u8(t, idx) simde_vqtbl1q_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbl1q_s8(simde_int8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1q_s8(t, idx); - #else - return simde_vreinterpretq_s8_u8(simde_vqtbl1q_u8(simde_vreinterpretq_u8_s8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1q_s8 - #define vqtbl1q_s8(t, idx) simde_vqtbl1q_s8((t), (idx)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbl2q_u8(simde_uint8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2q_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x4_t split; - simde_memcpy(&split, &t, sizeof(split)); - uint8x8_t lo = vtbl4_u8(split, vget_low_u8(idx)); - uint8x8_t hi = vtbl4_u8(split, vget_high_u8(idx)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_and(vec_perm(t.val[0], t.val[1], idx), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); - #else - simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; - simde_uint8x16_private - r_, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(31))); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - r_.m128i = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2q_u8 - #define vqtbl2q_u8(t, idx) simde_vqtbl2q_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbl2q_s8(simde_int8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2q_s8(t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbl2q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2q_s8 - #define vqtbl2q_s8(t, idx) simde_vqtbl2q_s8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbl3q_u8(simde_uint8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3q_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t idx_hi = vsubq_u8(idx, vdupq_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x2_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t hi_lo = vtbl2_u8(split_hi, vget_low_u8(idx_hi)); - uint8x8_t hi_hi = vtbl2_u8(split_hi, vget_high_u8(idx_hi)); - uint8x8_t lo = vtbx4_u8(hi_lo, split_lo, vget_low_u8(idx)); - uint8x8_t hi = vtbx4_u8(hi_hi, split_lo, vget_high_u8(idx)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); - return vec_and(vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); - #else - simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), - simde_uint8x16_to_private(t.val[2]) }; - simde_uint8x16_private - r_, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(47))); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); - __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); - r_.m128i = _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx_.m128i, 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), - wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3q_u8 - #define vqtbl3q_u8(t, idx) simde_vqtbl3q_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbl3q_s8(simde_int8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3q_s8(t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbl3q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3q_s8 - #define vqtbl3q_s8(t, idx) simde_vqtbl3q_s8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbl4q_u8(simde_uint8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4q_u8(t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t idx_hi = vsubq_u8(idx, vdupq_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x4_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t lo_lo = vtbl4_u8(split_lo, vget_low_u8(idx)); - uint8x8_t lo_hi = vtbl4_u8(split_lo, vget_high_u8(idx)); - uint8x8_t lo = vtbx4_u8(lo_lo, split_hi, vget_low_u8(idx_hi)); - uint8x8_t hi = vtbx4_u8(lo_hi, split_hi, vget_high_u8(idx_hi)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); - return vec_and(vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); - #else - simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), - simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; - simde_uint8x16_private - r_, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(63))); - __m128i idx_shl3 = _mm_slli_epi32(idx_.m128i, 3); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); - __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); - __m128i r_3 = _mm_shuffle_epi8(t_[3].m128i, idx_.m128i); - __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); - r_.m128i = _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx_.m128i, 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), - wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))), - wasm_i8x16_swizzle(t_[3].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(48))))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4q_u8 - #define vqtbl4q_u8(t, idx) simde_vqtbl4q_u8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbl4q_s8(simde_int8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4q_s8(t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbl4q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4q_s8 - #define vqtbl4q_s8(t, idx) simde_vqtbl4q_s8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbl1_p8(simde_poly8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1_p8(t, idx); - #else - return simde_vreinterpret_p8_u8(simde_vqtbl1_u8(simde_vreinterpretq_u8_p8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1_p8 - #define vqtbl1_p8(t, idx) simde_vqtbl1_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbl1q_p8(simde_poly8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl1q_p8(t, idx); - #else - return simde_vreinterpretq_p8_u8(simde_vqtbl1q_u8(simde_vreinterpretq_u8_p8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl1q_p8 - #define vqtbl1q_p8(t, idx) simde_vqtbl1q_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbl2_p8(simde_poly8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2_p8(t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbl2_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2_p8 - #define vqtbl2_p8(t, idx) simde_vqtbl2_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbl2q_p8(simde_poly8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl2q_p8(t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbl2q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl2q_p8 - #define vqtbl2q_p8(t, idx) simde_vqtbl2q_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbl3_p8(simde_poly8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3_p8(t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbl3_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3_p8 - #define vqtbl3_p8(t, idx) simde_vqtbl3_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbl3q_p8(simde_poly8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl3q_p8(t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbl3q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl3q_p8 - #define vqtbl3q_p8(t, idx) simde_vqtbl3q_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbl4_p8(simde_poly8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4_p8(t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbl4_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4_p8 - #define vqtbl4_p8(t, idx) simde_vqtbl4_p8((t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbl4q_p8(simde_poly8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbl4q_p8(t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbl4q_u8(t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbl4q_p8 - #define vqtbl4q_p8(t, idx) simde_vqtbl4q_p8((t), (idx)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QTBL_H) */ -/* :: End simde/arm/neon/qtbl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/qtbx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_QTBX_H) -#define SIMDE_ARM_NEON_QTBX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbx1_u8(simde_uint8x8_t a, simde_uint8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t split; - simde_memcpy(&split, &t, sizeof(split)); - return vtbx2_u8(a, split, idx); - #else - simde_uint8x16_private t_ = simde_uint8x16_to_private(t); - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15))); - __m128i r128 = _mm_shuffle_epi8(t_.m128i, idx128); - r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1_u8 - #define vqtbx1_u8(a, t, idx) simde_vqtbx1_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbx1_s8(simde_int8x8_t a, simde_int8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1_s8(a, t, idx); - #else - return simde_vreinterpret_s8_u8(simde_vqtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1_s8 - #define vqtbx1_s8(a, t, idx) simde_vqtbx1_s8((a), (t), (idx)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbx2_u8(simde_uint8x8_t a, simde_uint8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x4_t split; - simde_memcpy(&split, &t, sizeof(split)); - return vtbx4_u8(a, split, idx); - #else - simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); - r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2_u8 - #define vqtbx2_u8(a, t, idx) simde_vqtbx2_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbx2_s8(simde_int8x8_t a, simde_int8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2_s8(a, t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbx2_u8(simde_vreinterpret_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2_s8 - #define vqtbx2_s8(a, t, idx) simde_vqtbx2_s8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbx3_u8(simde_uint8x8_t a, simde_uint8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t idx_hi = vsub_u8(idx, vdup_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x2_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t hi = vtbx2_u8(a, split_hi, idx_hi); - return vtbx4_u8(hi, split_lo, idx); - #else - simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }; - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); - __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); - r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3_u8 - #define vqtbx3_u8(a, t, idx) simde_vqtbx3_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbx3_s8(simde_int8x8_t a, simde_int8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3_s8(a, t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbx3_u8(simde_vreinterpret_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3_s8 - #define vqtbx3_s8(a, t, idx) simde_vqtbx3_s8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vqtbx4_u8(simde_uint8x8_t a, simde_uint8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t idx_hi = vsub_u8(idx, vdup_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x4_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t lo = vtbx4_u8(a, split_lo, idx); - return vtbx4_u8(lo, split_hi, idx_hi); - #else - simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - idx_ = simde_uint8x8_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i idx128 = _mm_set1_epi64(idx_.m64); - idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); - __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); - __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); - __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); - __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); - __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); - __m128i r128_3 = _mm_shuffle_epi8(t_[3].m128i, idx128); - __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); - r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4_u8 - #define vqtbx4_u8(a, t, idx) simde_vqtbx4_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vqtbx4_s8(simde_int8x8_t a, simde_int8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4_s8(a, t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_s8_u8(simde_vqtbx4_u8(simde_vreinterpret_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4_s8 - #define vqtbx4_s8(a, t, idx) simde_vqtbx4_s8((a), (t), (idx)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbx1q_u8(simde_uint8x16_t a, simde_uint8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1q_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t split; - simde_memcpy(&split, &t, sizeof(split)); - uint8x8_t lo = vtbx2_u8(vget_low_u8(a), split, vget_low_u8(idx)); - uint8x8_t hi = vtbx2_u8(vget_high_u8(a), split, vget_high_u8(idx)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sel(a, - vec_perm(t, t, idx), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - t_ = simde_uint8x16_to_private(t), - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(15))); - r_.m128i = _mm_blendv_epi8(_mm_shuffle_epi8(t_.m128i, idx_.m128i), a_.m128i, idx_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_i8x16_swizzle(t_.v128, idx_.v128), - wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(15)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1q_u8 - #define vqtbx1q_u8(a, t, idx) simde_vqtbx1q_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbx1q_s8(simde_int8x16_t a, simde_int8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1q_s8(a, t, idx); - #else - return simde_vreinterpretq_s8_u8(simde_vqtbx1q_u8(simde_vreinterpretq_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1q_s8 - #define vqtbx1q_s8(a, t, idx) simde_vqtbx1q_s8((a), (t), (idx)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbx2q_u8(simde_uint8x16_t a, simde_uint8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2q_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x4_t split; - simde_memcpy(&split, &t, sizeof(split)); - uint8x8_t lo = vtbx4_u8(vget_low_u8(a), split, vget_low_u8(idx)); - uint8x8_t hi = vtbx4_u8(vget_high_u8(a), split, vget_high_u8(idx)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_sel(a, vec_perm(t.val[0], t.val[1], idx), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(31))); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - __m128i r = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); - r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), - wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(31)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2q_u8 - #define vqtbx2q_u8(a, t, idx) simde_vqtbx2q_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbx2q_s8(simde_int8x16_t a, simde_int8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2q_s8(a, t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbx2q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2q_s8 - #define vqtbx2q_s8(a, t, idx) simde_vqtbx2q_s8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbx3q_u8(simde_uint8x16_t a, simde_uint8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3q_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t idx_hi = vsubq_u8(idx, vdupq_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x2_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t hi_lo = vtbx2_u8(vget_low_u8(a), split_hi, vget_low_u8(idx_hi)); - uint8x8_t hi_hi = vtbx2_u8(vget_high_u8(a), split_hi, vget_high_u8(idx_hi)); - uint8x8_t lo_lo = vtbx4_u8(hi_lo, split_lo, vget_low_u8(idx)); - uint8x8_t lo_hi = vtbx4_u8(hi_hi, split_lo, vget_high_u8(idx)); - return vcombine_u8(lo_lo, lo_hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); - return vec_sel(a, - vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(47))); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); - __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); - __m128i r = _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx_.m128i, 2)); - r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), - wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))) , - wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(47))))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3q_u8 - #define vqtbx3q_u8(a, t, idx) simde_vqtbx3q_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbx3q_s8(simde_int8x16_t a, simde_int8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3q_s8(a, t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbx3q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3q_s8 - #define vqtbx3q_s8(a, t, idx) simde_vqtbx3q_s8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vqtbx4q_u8(simde_uint8x16_t a, simde_uint8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4q_u8(a, t, idx); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t idx_hi = vsubq_u8(idx, vdupq_n_u8(32)); - uint8x8x4_t split_lo; - uint8x8x4_t split_hi; - simde_memcpy(&split_lo, &t.val[0], sizeof(split_lo)); - simde_memcpy(&split_hi, &t.val[2], sizeof(split_hi)); - uint8x8_t lo_lo = vtbx4_u8(vget_low_u8(a), split_lo, vget_low_u8(idx)); - uint8x8_t lo_hi = vtbx4_u8(vget_high_u8(a), split_lo, vget_high_u8(idx)); - uint8x8_t lo = vtbx4_u8(lo_lo, split_hi, vget_low_u8(idx_hi)); - uint8x8_t hi = vtbx4_u8(lo_hi, split_hi, vget_high_u8(idx_hi)); - return vcombine_u8(lo, hi); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); - return vec_sel(a, - vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), - vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }, - idx_ = simde_uint8x16_to_private(idx); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(63))); - __m128i idx_shl3 = _mm_slli_epi32(idx_.m128i, 3); - __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); - __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); - __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); - __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); - __m128i r_3 = _mm_shuffle_epi8(t_[3].m128i, idx_.m128i); - __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); - __m128i r = _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx_.m128i, 2)); - r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_v128_or(wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), - wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), - wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))), - wasm_i8x16_swizzle(t_[3].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(48))))), - wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(63)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4q_u8 - #define vqtbx4q_u8(a, t, idx) simde_vqtbx4q_u8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vqtbx4q_s8(simde_int8x16_t a, simde_int8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4q_s8(a, t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_s8_u8(simde_vqtbx4q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4q_s8 - #define vqtbx4q_s8(a, t, idx) simde_vqtbx4q_s8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbx1_p8(simde_poly8x8_t a, simde_poly8x16_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1_p8(a, t, idx); - #else - return simde_vreinterpret_p8_u8(simde_vqtbx1_u8(simde_vreinterpret_u8_p8(a), simde_vreinterpretq_u8_p8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1_p8 - #define vqtbx1_p8(a, t, idx) simde_vqtbx1_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbx1q_p8(simde_poly8x16_t a, simde_poly8x16_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx1q_p8(a, t, idx); - #else - return simde_vreinterpretq_p8_u8(simde_vqtbx1q_u8(simde_vreinterpretq_u8_p8(a), simde_vreinterpretq_u8_p8(t), idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx1q_p8 - #define vqtbx1q_p8(a, t, idx) simde_vqtbx1q_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbx2_p8(simde_poly8x8_t a, simde_poly8x16x2_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2_p8(a, t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbx2_u8(simde_vreinterpret_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2_p8 - #define vqtbx2_p8(a, t, idx) simde_vqtbx2_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbx2q_p8(simde_poly8x16_t a, simde_poly8x16x2_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx2q_p8(a, t, idx); - #else - simde_uint8x16x2_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbx2q_u8(simde_vreinterpretq_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx2q_p8 - #define vqtbx2q_p8(a, t, idx) simde_vqtbx2q_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbx3_p8(simde_poly8x8_t a, simde_poly8x16x3_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3_p8(a, t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbx3_u8(simde_vreinterpret_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3_p8 - #define vqtbx3_p8(a, t, idx) simde_vqtbx3_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbx3q_p8(simde_poly8x16_t a, simde_poly8x16x3_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx3q_p8(a, t, idx); - #else - simde_uint8x16x3_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbx3q_u8(simde_vreinterpretq_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx3q_p8 - #define vqtbx3q_p8(a, t, idx) simde_vqtbx3q_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vqtbx4_p8(simde_poly8x8_t a, simde_poly8x16x4_t t, simde_uint8x8_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4_p8(a, t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpret_p8_u8(simde_vqtbx4_u8(simde_vreinterpret_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4_p8 - #define vqtbx4_p8(a, t, idx) simde_vqtbx4_p8((a), (t), (idx)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vqtbx4q_p8(simde_poly8x16_t a, simde_poly8x16x4_t t, simde_uint8x16_t idx) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqtbx4q_p8(a, t, idx); - #else - simde_uint8x16x4_t t_; - simde_memcpy(&t_, &t, sizeof(t_)); - return simde_vreinterpretq_p8_u8(simde_vqtbx4q_u8(simde_vreinterpretq_u8_p8(a), t_, idx)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqtbx4q_p8 - #define vqtbx4q_p8(a, t, idx) simde_vqtbx4q_p8((a), (t), (idx)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_QTBX_H) */ -/* :: End simde/arm/neon/qtbx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/raddhn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RADDHN_H) -#define SIMDE_ARM_NEON_RADDHN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vraddhn_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - int16_t round_cast = 1 << 7; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i] + round_cast; - } - return simde_vmovn_s16(simde_vshrq_n_s16(simde_int16x8_from_private(r_), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_s16 - #define vraddhn_s16(a, b) simde_vraddhn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vraddhn_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - int round_cast = 1 << 15; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] + b_.values[i] + round_cast; - } - return simde_vmovn_s32(simde_vshrq_n_s32(simde_int32x4_from_private(r_), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_s32 - #define vraddhn_s32(a, b) simde_vraddhn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vraddhn_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - int64_t round_cast = 1ll << 31; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] + b_.values[i] + round_cast) >> 32); - } - return simde_vmovn_s64(simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_s64 - #define vraddhn_s64(a, b) simde_vraddhn_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vraddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - uint16_t round_cast = 1 << 7; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] + b_.values[i] + round_cast); - } - return simde_vmovn_u16(simde_vshrq_n_u16(simde_uint16x8_from_private(r_), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_u16 - #define vraddhn_u16(a, b) simde_vraddhn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vraddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - uint32_t round_cast = 1 << 15; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] + b_.values[i] + round_cast); - } - return simde_vmovn_u32(simde_vshrq_n_u32(simde_uint32x4_from_private(r_), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_u32 - #define vraddhn_u32(a, b) simde_vraddhn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vraddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vraddhn_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - uint64_t round_cast = 1ull << 31; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] + b_.values[i] + round_cast) >> 32); - } - return simde_vmovn_u64(simde_uint64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vraddhn_u64 - #define vraddhn_u64(a, b) simde_vraddhn_u64((a), (b)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RADDHN_H) */ -/* :: End simde/arm/neon/raddhn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/raddhn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RADDHN_HIGH_H) -#define SIMDE_ARM_NEON_RADDHN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_s16(r, a, b) vraddhn_high_s16((r), (a), (b)) -#else - #define simde_vraddhn_high_s16(r, a, b) simde_vcombine_s8(r, simde_vraddhn_s16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_s16 - #define vraddhn_high_s16(r, a, b) simde_vraddhn_high_s16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_s32(r, a, b) vraddhn_high_s32((r), (a), (b)) -#else - #define simde_vraddhn_high_s32(r, a, b) simde_vcombine_s16(r, simde_vraddhn_s32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_s32 - #define vraddhn_high_s32(r, a, b) simde_vraddhn_high_s32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_s64(r, a, b) vraddhn_high_s64((r), (a), (b)) -#else - #define simde_vraddhn_high_s64(r, a, b) simde_vcombine_s32(r, simde_vraddhn_s64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_s64 - #define vraddhn_high_s64(r, a, b) simde_vraddhn_high_s64((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_u16(r, a, b) vraddhn_high_u16((r), (a), (b)) -#else - #define simde_vraddhn_high_u16(r, a, b) simde_vcombine_u8(r, simde_vraddhn_u16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_u16 - #define vraddhn_high_u16(r, a, b) simde_vraddhn_high_u16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_u32(r, a, b) vraddhn_high_u32((r), (a), (b)) -#else - #define simde_vraddhn_high_u32(r, a, b) simde_vcombine_u16(r, simde_vraddhn_u32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_u32 - #define vraddhn_high_u32(r, a, b) simde_vraddhn_high_u32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vraddhn_high_u64(r, a, b) vraddhn_high_u64((r), (a), (b)) -#else - #define simde_vraddhn_high_u64(r, a, b) simde_vcombine_u32(r, simde_vraddhn_u64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vraddhn_high_u64 - #define vraddhn_high_u64(r, a, b) simde_vraddhn_high_u64((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RADDHN_HIGH_H) */ -/* :: End simde/arm/neon/raddhn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rax.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RAX_H) -#define SIMDE_ARM_NEON_RAX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vrax1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - return vrax1q_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = (b_.values[i] >> 63) | (b_.values[i] << 1); - r_.values[i] = a_.values[i] ^ b_.values[i]; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrax1q_u64 - #define vrax1q_u64(a, b) simde_vrax1q_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RAX_H) */ -/* :: End simde/arm/neon/rax.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rbit.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* The GFNI implementation is based on Wojciech Muła's work at - * http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html#bit-shuffling via - * https://github.com/InstLatx64/InstLatX64_Demo/blob/49c27effdfd5a45f27e0ccb6e2f3be5f27c3845d/GFNI_Demo.h#L173 */ - -#if !defined(SIMDE_ARM_NEON_RBIT_H) -#define SIMDE_ARM_NEON_RBIT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrbit_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbit_u8(a); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_GFNI_NATIVE) - __m128i tmp = _mm_movpi64_epi64(a_.m64); - tmp = _mm_gf2p8affine_epi64_epi8(tmp, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); - r_.m64 = _mm_movepi64_pi64(tmp); - #elif defined(SIMDE_X86_MMX_NATIVE) - __m64 mask; - mask = _mm_set1_pi8(0x55); - a_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 1)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 1))); - mask = _mm_set1_pi8(0x33); - a_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 2)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 2))); - mask = _mm_set1_pi8(0x0F); - r_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 4)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 4))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) - r_.values[i] = __builtin_bitreverse8(a_.values[i]); - #else - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); - #endif - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbit_u8 - #define vrbit_u8(a) simde_vrbit_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrbit_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbit_s8(a); - #else - return simde_vreinterpret_s8_u8(simde_vrbit_u8(simde_vreinterpret_u8_s8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbit_s8 - #define vrbit_s8(a) simde_vrbit_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrbitq_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbitq_u8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) shift; - shift = vec_splat_u8(1); - a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x55))); - shift = vec_splat_u8(2); - a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x33))); - shift = vec_splat_u8(4); - return vec_or(vec_sl(a, shift), vec_sr(a, shift)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - - #if defined(SIMDE_X86_GFNI_NATIVE) - r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i mask; - mask = _mm_set1_epi8(0x55); - a_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 1)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 1))); - mask = _mm_set1_epi8(0x33); - a_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 2)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 2))); - mask = _mm_set1_epi8(0x0F); - r_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 4)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 4))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.v128 = wasm_v128_bitselect(wasm_u8x16_shr(a_.v128, 1), wasm_i8x16_shl(a_.v128, 1), wasm_i8x16_splat(0x55)); - a_.v128 = wasm_v128_bitselect(wasm_u8x16_shr(a_.v128, 2), wasm_i8x16_shl(a_.v128, 2), wasm_i8x16_splat(0x33)); - r_.v128 = wasm_v128_or(wasm_u8x16_shr(a_.v128, 4), wasm_i8x16_shl(a_.v128, 4)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) - r_.values[i] = __builtin_bitreverse8(a_.values[i]); - #else - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); - #endif - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbitq_u8 - #define vrbitq_u8(a) simde_vrbitq_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrbitq_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbitq_s8(a); - #else - return simde_vreinterpretq_s8_u8(simde_vrbitq_u8(simde_vreinterpretq_u8_s8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbitq_s8 - #define vrbitq_s8(a) simde_vrbitq_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vrbit_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbit_p8(a); - #else - return simde_vreinterpret_p8_u8(simde_vrbit_u8(simde_vreinterpret_u8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbit_p8 - #define vrbit_p8(a) simde_vrbit_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vrbitq_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrbitq_p8(a); - #else - return simde_vreinterpretq_p8_u8(simde_vrbitq_u8(simde_vreinterpretq_u8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrbitq_p8 - #define vrbitq_p8(a) simde_vrbitq_p8(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RBIT_H) */ -/* :: End simde/arm/neon/rbit.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/recpe.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RECPE_H) -#define SIMDE_ARM_NEON_RECPE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrecpeh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpeh_f16(a); - #else - simde_float32_t r_; - simde_float32_t a_ = simde_float16_to_float32(a); - r_ = 1.0f / a_; - return simde_float16_from_float32(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrecpeh_f16 - #define vrecpeh_f16(a) simde_vrecpeh_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrecpes_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpes_f32(a); - #else - return SIMDE_FLOAT32_C(1.0) / a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpes_f32 - #define vrecpes_f32(a) simde_vrecpes_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vrecped_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecped_f64(a); - #else - return SIMDE_FLOAT64_C(1.0) / a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecped_f64 - #define vrecped_f64(a) simde_vrecped_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrecpe_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpe_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecpeh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrecpe_f16 - #define vrecpe_f16(a) simde_vrecpe_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrecpe_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecpe_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - #if defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.values[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.values[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.values[i] = simde_vrecpes_f32(a_.values[i]); - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecpe_f32 - #define vrecpe_f32(a) simde_vrecpe_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrecpe_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpe_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = 1.0 / a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecped_f64(a_.values[i]); - } - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpe_f64 - #define vrecpe_f64(a) simde_vrecpe_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrecpeq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpeq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = 1.0 / a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecped_f64(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpeq_f64 - #define vrecpeq_f64(a) simde_vrecpeq_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrecpeq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecpeq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_re(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_rcp_ps(a_.m128); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.values[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.values[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.values[i] = simde_vrecpes_f32(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecpeq_f32 - #define vrecpeq_f32(a) simde_vrecpeq_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrecpeq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpeq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecpeh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrecpeq_f16 - #define vrecpeq_f16(a) simde_vrecpeq_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrecpe_u32(simde_uint32x2_t a){ - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecpe_u32(a); - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - r_; - - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (a_.values[i] <= 0x7FFFFFFF){ - r_.values[i] = UINT32_MAX; - } else { - uint32_t a_temp = (a_.values[i] >> 23) & 511; - a_temp = a_temp * 2 + 1; - uint32_t b = (1 << 19) / a_temp; - r_.values[i] = (b+1) / 2; - r_.values[i] = r_.values[i] << 23; - } - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecpe_u32 - #define vrecpe_u32(a) simde_vrecpe_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrecpeq_u32(simde_uint32x4_t a){ - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecpeq_u32(a); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - r_; - - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (a_.values[i] <= 0x7FFFFFFF){ - r_.values[i] = UINT32_MAX; - } else { - uint32_t a_temp = (a_.values[i] >> 23) & 511; - a_temp = a_temp * 2 + 1; - uint32_t b = (1 << 19) / a_temp; - r_.values[i] = (b+1) / 2; - r_.values[i] = r_.values[i] << 23; - } - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecpeq_u32 - #define vrecpeq_u32(a) simde_vrecpeq_u32((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_RECPE_H) */ -/* :: End simde/arm/neon/recpe.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/recps.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RECPS_H) -#define SIMDE_ARM_NEON_RECPS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrecpsh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpsh_f16(a, b); - #else - return simde_float16_from_float32(SIMDE_FLOAT32_C(2.0) - - simde_float16_to_float32(a) * simde_float16_to_float32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpsh_f16 - #define vrecpsh_f16(a, b) simde_vrecpsh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrecpss_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpss_f32(a, b); - #else - return SIMDE_FLOAT32_C(2.0) - (a * b); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpss_f32 - #define vrecpss_f32(a, b) simde_vrecpss_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vrecpsd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpsd_f64(a, b); - #else - return SIMDE_FLOAT64_C(2.0) - (a * b); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpsd_f64 - #define vrecpsd_f64(a, b) simde_vrecpsd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrecps_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecps_f64(a, b); - #else - return simde_vmls_f64(simde_vdup_n_f64(SIMDE_FLOAT64_C(2.0)), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecps_f64 - #define vrecps_f64(a, b) simde_vrecps_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrecps_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecps_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecpsh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrecps_f16 - #define vrecps_f16(a, b) simde_vrecps_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrecps_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecps_f32(a, b); - #else - return simde_vmls_f32(simde_vdup_n_f32(SIMDE_FLOAT32_C(2.0)), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecps_f32 - #define vrecps_f32(a, b) simde_vrecps_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrecpsq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpsq_f64(a, b); - #else - return simde_vmlsq_f64(simde_vdupq_n_f64(SIMDE_FLOAT64_C(2.0)), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpsq_f64 - #define vrecpsq_f64(a, b) simde_vrecpsq_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrecpsq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrecpsq_f32(a, b); - #else - return simde_vmlsq_f32(simde_vdupq_n_f32(SIMDE_FLOAT32_C(2.0)), a, b); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrecpsq_f32 - #define vrecpsq_f32(a, b) simde_vrecpsq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrecpsq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpsq_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrecpsh_f16(a_.values[i], b_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrecpsq_f16 - #define vrecpsq_f16(a, b) simde_vrecpsq_f16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_RECPS_H) */ -/* :: End simde/arm/neon/recps.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/recpx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RECPX_H) -#define SIMDE_ARM_NEON_RECPX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrecpxh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrecpxh_f16(a); - #else - if (simde_isnanhf(a)) { - return SIMDE_NANHF; - } - uint16_t n; - simde_memcpy(&n, &a, sizeof(a)); - uint16_t sign = n & 0x8000; - uint16_t exp = n & 0x7c00; - uint16_t result; - if (exp == 0) { - uint16_t max_exp = 0x7b00; - result = sign|max_exp; - } - else { - exp = ~(exp) & 0x7c00; - result = sign|exp; - } - simde_memcpy(&a, &result, sizeof(result)); - return a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpxh_f16 - #define vrecpxh_f16(a) simde_vrecpxh_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrecpxs_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpxs_f32(a); - #else - if (simde_math_isnanf(a)) { - return SIMDE_MATH_NANF; - } - uint32_t n; - simde_memcpy(&n, &a, sizeof(a)); - uint32_t sign = n & 0x80000000; - uint32_t exp = n & 0x7f800000; - uint32_t result; - if (exp == 0) { - uint32_t max_exp = 0x7f000000; - result = sign|max_exp; - } - else { - exp = ~(exp) & 0x7f800000; - result = sign|exp; - } - simde_memcpy(&a, &result, sizeof(result)); - return a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpxs_f32 - #define vrecpxs_f32(a) simde_vrecpxs_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vrecpxd_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrecpxd_f64(a); - #else - if (simde_math_isnan(a)) { - return SIMDE_MATH_NAN; - } - uint64_t n; - simde_memcpy(&n, &a, sizeof(a)); - uint64_t sign = n & 0x8000000000000000ull; - uint64_t exp = n & 0x7ff0000000000000ull; - uint64_t result; - if (exp == 0) { - uint64_t max_exp = 0x7fe0000000000000ull; - result = sign|max_exp; - } - else { - exp = ~(exp) & 0x7ff0000000000000ull; - result = sign|exp; - } - simde_memcpy(&a, &result, sizeof(result)); - return a; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrecpxd_f64 - #define vrecpxd_f64(a) simde_vrecpxd_f64((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_RECPX_H) */ -/* :: End simde/arm/neon/recpx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rev16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_REV16_H) -#define SIMDE_ARM_NEON_REV16_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrev16_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(6, 7, 4, 5, 2, 3, 0, 1)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16_s8 - #define vrev16_s8(a) simde_vrev16_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrev16_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16_u8(a); - #else - return simde_vreinterpret_u8_s8(simde_vrev16_s8(simde_vreinterpret_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16_u8 - #define vrev16_u8(a) simde_vrev16_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrev16q_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16q_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), a))); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_reve(a)))); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16q_s8 - #define vrev16q_s8(a) simde_vrev16q_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrev16q_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16q_u8(a); - #else - return simde_vreinterpretq_u8_s8(simde_vrev16q_s8(simde_vreinterpretq_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16q_u8 - #define vrev16q_u8(a) simde_vrev16q_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vrev16_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16_p8(a); - #else - return simde_vreinterpret_p8_s8(simde_vrev16_s8(simde_vreinterpret_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16_p8 - #define vrev16_p8(a) simde_vrev16_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vrev16q_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev16q_p8(a); - #else - return simde_vreinterpretq_p8_s8(simde_vrev16q_s8(simde_vreinterpretq_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev16q_p8 - #define vrev16q_p8(a) simde_vrev16q_p8(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_REV16_H) */ -/* :: End simde/arm/neon/rev16.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rev32.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_REV32_H) -#define SIMDE_ARM_NEON_REV32_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrev32_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(4, 5, 6, 7, 0, 1, 2, 3)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 3]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_s8 - #define vrev32_s8(a) simde_vrev32_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrev32_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_s16(a); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi16(a_.m64, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 0, 3, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_s16 - #define vrev32_s16(a) simde_vrev32_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrev32_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_u8(a); - #else - return simde_vreinterpret_u8_s8(simde_vrev32_s8(simde_vreinterpret_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_u8 - #define vrev32_u8(a) simde_vrev32_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrev32_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_u16(a); - #else - return simde_vreinterpret_u16_s16(simde_vrev32_s16(simde_vreinterpret_s16_u16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_u16 - #define vrev32_u16(a) simde_vrev32_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrev32q_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), a))); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, - 4, 5, 6, 7, 0, 1, 2, 3)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 3]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_s8 - #define vrev32q_s8(a) simde_vrev32q_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vrev32q_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, - 5, 4, 7, 6, 1, 0, 3, 2)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_shufflehi_epi16(_mm_shufflelo_epi16(a_.m128i, - (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)), - (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_s16 - #define vrev32q_s16(a) simde_vrev32q_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrev32q_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_u8(a); - #else - return simde_vreinterpretq_u8_s8(simde_vrev32q_s8(simde_vreinterpretq_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_u8 - #define vrev32q_u8(a) simde_vrev32q_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vrev32q_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_u16(a); - #else - return simde_vreinterpretq_u16_s16(simde_vrev32q_s16(simde_vreinterpretq_s16_u16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_u16 - #define vrev32q_u16(a) simde_vrev32q_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vrev32_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_p8(a); - #else - return simde_vreinterpret_p8_s8(simde_vrev32_s8(simde_vreinterpret_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_p8 - #define vrev32_p8(a) simde_vrev32_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vrev32_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32_p16(a); - #else - return simde_vreinterpret_p16_s16(simde_vrev32_s16(simde_vreinterpret_s16_p16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32_p16 - #define vrev32_p16(a) simde_vrev32_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vrev32q_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_p8(a); - #else - return simde_vreinterpretq_p8_s8(simde_vrev32q_s8(simde_vreinterpretq_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_p8 - #define vrev32q_p8(a) simde_vrev32q_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vrev32q_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev32q_p16(a); - #else - return simde_vreinterpretq_p16_s16(simde_vrev32q_s16(simde_vreinterpretq_s16_p16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev32q_p16 - #define vrev32q_p16(a) simde_vrev32q_p16(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_REV32_H) */ -/* :: End simde/arm/neon/rev32.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rev64.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_REV64_H) -#define SIMDE_ARM_NEON_REV64_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrev64_s8(simde_int8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_s8(a); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(0, 1, 2, 3, 4, 5, 6, 7)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 7]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_s8 - #define vrev64_s8(a) simde_vrev64_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrev64_s16(simde_int16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_s16(a); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi16(a_.m64, (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 3, 2, 1, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 3]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_s16 - #define vrev64_s16(a) simde_vrev64_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vrev64_s32(simde_int32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_s32(a); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi16(a_.m64, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_s32 - #define vrev64_s32(a) simde_vrev64_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrev64_u8(simde_uint8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_u8(a); - #else - return simde_vreinterpret_u8_s8(simde_vrev64_s8(simde_vreinterpret_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_u8 - #define vrev64_u8(a) simde_vrev64_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrev64_u16(simde_uint16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_u16(a); - #else - return simde_vreinterpret_u16_s16(simde_vrev64_s16(simde_vreinterpret_s16_u16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_u16 - #define vrev64_u16(a) simde_vrev64_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrev64_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_u32(a); - #else - return simde_vreinterpret_u32_s32(simde_vrev64_s32(simde_vreinterpret_s32_u32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_u32 - #define vrev64_u32(a) simde_vrev64_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrev64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrev64_f16(a); - #else - return simde_vreinterpret_f16_s16(simde_vrev64_s16(simde_vreinterpret_s16_f16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_f16 - #define vrev64_f16(a) simde_vrev64_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrev64_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_f32(a); - #else - return simde_vreinterpret_f32_s32(simde_vrev64_s32(simde_vreinterpret_s32_f32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_f32 - #define vrev64_f32(a) simde_vrev64_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrev64q_s8(simde_int8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_s8(a); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a))); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 7]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_s8 - #define vrev64q_s8(a) simde_vrev64q_s8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vrev64q_s16(simde_int16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_s16(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(9, 8, 11, 10, 13, 12, 15, 14, - 1, 0, 3, 2, 5, 4, 7, 6)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_shufflehi_epi16(_mm_shufflelo_epi16(a_.m128i, - (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)), - (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 3]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_s16 - #define vrev64q_s16(a) simde_vrev64q_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vrev64q_s32(simde_int32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_s32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), - vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_shuffle_epi32(a_.m128i, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 0, 3, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i ^ 1]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_s32 - #define vrev64q_s32(a) simde_vrev64q_s32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrev64q_u8(simde_uint8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_u8(a); - #else - return simde_vreinterpretq_u8_s8(simde_vrev64q_s8(simde_vreinterpretq_s8_u8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_u8 - #define vrev64q_u8(a) simde_vrev64q_u8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vrev64q_u16(simde_uint16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_u16(a); - #else - return simde_vreinterpretq_u16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_u16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_u16 - #define vrev64q_u16(a) simde_vrev64q_u16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrev64q_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_u32(a); - #else - return simde_vreinterpretq_u32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_u32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_u32 - #define vrev64q_u32(a) simde_vrev64q_u32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrev64q_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrev64q_f16(a); - #else - return simde_vreinterpretq_f16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_f16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_f16 - #define vrev64q_f16(a) simde_vrev64q_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrev64q_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_f32(a); - #else - return simde_vreinterpretq_f32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_f32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_f32 - #define vrev64q_f32(a) simde_vrev64q_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vrev64_p8(simde_poly8x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_p8(a); - #else - return simde_vreinterpret_p8_s8(simde_vrev64_s8(simde_vreinterpret_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_p8 - #define vrev64_p8(a) simde_vrev64_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vrev64_p16(simde_poly16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64_p16(a); - #else - return simde_vreinterpret_p16_s16(simde_vrev64_s16(simde_vreinterpret_s16_p16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64_p16 - #define vrev64_p16(a) simde_vrev64_p16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vrev64q_p8(simde_poly8x16_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_p8(a); - #else - return simde_vreinterpretq_p8_s8(simde_vrev64q_s8(simde_vreinterpretq_s8_p8(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_p8 - #define vrev64q_p8(a) simde_vrev64q_p8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vrev64q_p16(simde_poly16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrev64q_p16(a); - #else - return simde_vreinterpretq_p16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_p16(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrev64q_p16 - #define vrev64q_p16(a) simde_vrev64q_p16(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_REV64_H) */ -/* :: End simde/arm/neon/rev64.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rhadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - */ - -/* Formula to average two unsigned integers without overflow is from Hacker's Delight (ISBN 978-0-321-84268-8). - * https://web.archive.org/web/20180831033349/http://hackersdelight.org/basics2.pdf#G525596 - * avg_u = (x | y) - ((x ^ y) >> 1); - * - * Formula to average two signed integers (without widening): - * avg_s = (x >> 1) + (y >> 1) + ((x | y) & 1); // use arithmetic shifts - * - * If hardware has avg_u but not avg_s then rebase input to be unsigned. - * For example: s8 (-128..127) can be converted to u8 (0..255) by adding +128. - * Idea borrowed from Intel's ARM_NEON_2_x86_SSE project. - * https://github.com/intel/ARM_NEON_2_x86_SSE/blob/3c9879bf2dbef3274e0ed20f93cb8da3a2115ba1/NEON_2_SSE.h#L3171 - * avg_s8 = avg_u8(a ^ 0x80, b ^ 0x80) ^ 0x80; - */ - -#if !defined(SIMDE_ARM_NEON_RHADD_H) -#define SIMDE_ARM_NEON_RHADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_s8 - #define vrhadd_s8(a, b) simde_vrhadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi16(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), - _mm_add_pi16(_m_psrawi(a_.m64, 1), _m_psrawi(b_.m64, 1))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_s16 - #define vrhadd_s16(a, b) simde_vrhadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vrhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi32(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), - _mm_add_pi32(_m_psradi(a_.m64, 1), _m_psradi(b_.m64, 1))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_s32 - #define vrhadd_s32(a, b) simde_vrhadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint8_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint8_t, 1))); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_u8 - #define vrhadd_u8(a, b) simde_vrhadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi16(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), - _mm_add_pi16(_mm_srli_pi16(a_.m64, 1), _mm_srli_pi16(b_.m64, 1))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint16_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint16_t, 1))); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_u16 - #define vrhadd_u16(a, b) simde_vrhadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhadd_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_add_pi32(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), - _mm_add_pi32(_mm_srli_pi32(a_.m64, 1), _mm_srli_pi32(b_.m64, 1))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint32_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint32_t, 1))); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhadd_u32 - #define vrhadd_u32(a, b) simde_vrhadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - const __m128i msb = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, -128)); /* 0x80 */ - r_.m128i = _mm_xor_si128(_mm_avg_epu8(_mm_xor_si128(a_.m128i, msb), _mm_xor_si128(b_.m128i, msb)), msb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t msb = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, -128)); /* 0x80 */ - r_.v128 = wasm_v128_xor(wasm_u8x16_avgr(wasm_v128_xor(a_.v128, msb), wasm_v128_xor(b_.v128, msb)), msb); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_s8 - #define vrhaddq_s8(a, b) simde_vrhaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vrhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - const __m128i msb = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, -32768)); /* 0x8000 */ - r_.m128i = _mm_xor_si128(_mm_avg_epu16(_mm_xor_si128(a_.m128i, msb), _mm_xor_si128(b_.m128i, msb)), msb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t msb = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, -32768)); /* 0x8000 */ - r_.v128 = wasm_v128_xor(wasm_u16x8_avgr(wasm_v128_xor(a_.v128, msb), wasm_v128_xor(b_.v128, msb)), msb); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_s16 - #define vrhaddq_s16(a, b) simde_vrhaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vrhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_add_epi32(_mm_and_si128(_mm_or_si128(a_.m128i, b_.m128i), _mm_set1_epi32(1)), - _mm_add_epi32(_mm_srai_epi32(a_.m128i, 1), _mm_srai_epi32(b_.m128i, 1))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_add(wasm_v128_and(wasm_v128_or(a_.v128, b_.v128), wasm_i32x4_splat(1)), - wasm_i32x4_add(wasm_i32x4_shr(a_.v128, 1), wasm_i32x4_shr(b_.v128, 1))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_s32 - #define vrhaddq_s32(a, b) simde_vrhaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_avg_epu8(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u8x16_avgr(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint8_t, 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint8_t, 1)); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_u8 - #define vrhaddq_u8(a, b) simde_vrhaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vrhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_avg_epu16(a_.m128i, b_.m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_u16x8_avgr(a_.v128, b_.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint16_t, 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint16_t, 1)); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_u16 - #define vrhaddq_u16(a, b) simde_vrhaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrhaddq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_sub_epi32(_mm_or_si128(a_.m128i, b_.m128i), _mm_srli_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_sub(wasm_v128_or(a_.v128, b_.v128), wasm_u32x4_shr(wasm_v128_xor(a_.v128, b_.v128), 1)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint32_t, 1)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint32_t, 1)); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrhaddq_u32 - #define vrhaddq_u32(a, b) simde_vrhaddq_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RHADD_H) */ -/* :: End simde/arm/neon/rhadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RND_H) -#define SIMDE_ARM_NEON_RND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndh_f16(a); - #else - return simde_float16_from_float32(simde_math_truncf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndh_f16 - #define vrndh_f16(a) simde_vrndh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrnd_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrnd_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrnd_f16 - #define vrnd_f16(a) simde_vrnd_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnd_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrnd_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_truncf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrnd_f32 - #define vrnd_f32(a) simde_vrnd_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnd_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrnd_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_trunc(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd_f64 - #define vrnd_f64(a) simde_vrnd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndq_f16 - #define vrndq_f16(a) simde_vrndq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_trunc(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_ZERO); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_trunc_ps(a_.m128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_truncf(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndq_f32 - #define vrndq_f32(a) simde_vrndq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndq_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_trunc(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_ZERO); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128d = _mm_trunc_pd(a_.m128d); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_trunc(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndq_f64 - #define vrndq_f64(a) simde_vrndq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RND_H) */ -/* :: End simde/arm/neon/rnd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnd32x.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RND32X_H) -#define SIMDE_ARM_NEON_RND32X_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// src: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-09/msg00053.html -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnd32x_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32x_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } else { - r_.values[i] = simde_math_rintf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } - } - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32x_f32 - #define vrnd32x_f32(a) simde_vrnd32x_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnd32x_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32x_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } else { - r_.values[i] = simde_math_rint(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } - } - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32x_f64 - #define vrnd32x_f64(a) simde_vrnd32x_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrnd32xq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32xq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } else { - r_.values[i] = simde_math_rintf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } - } - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32xq_f32 - #define vrnd32xq_f32(a) simde_vrnd32xq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrnd32xq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32xq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } else { - r_.values[i] = simde_math_rint(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } - } - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32xq_f64 - #define vrnd32xq_f64(a) simde_vrnd32xq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RND32X_H) */ -/* :: End simde/arm/neon/rnd32x.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnd32z.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RND32Z_H) -#define SIMDE_ARM_NEON_RND32Z_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// src: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-09/msg00053.html -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnd32z_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32z_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } else { - r_.values[i] = simde_math_truncf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } - } - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32z_f32 - #define vrnd32z_f32(a) simde_vrnd32z_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnd32z_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32z_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } else { - r_.values[i] = simde_math_trunc(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } - } - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32z_f64 - #define vrnd32z_f64(a) simde_vrnd32z_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrnd32zq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32zq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } else { - r_.values[i] = simde_math_truncf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT32_MIN); - } - } - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32zq_f32 - #define vrnd32zq_f32(a) simde_vrnd32zq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrnd32zq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd32zq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } else { - r_.values[i] = simde_math_trunc(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT32_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT32_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT32_MIN); - } - } - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd32zq_f64 - #define vrnd32zq_f64(a) simde_vrnd32zq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RND32Z_H) */ -/* :: End simde/arm/neon/rnd32z.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnd64x.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RND64X_H) -#define SIMDE_ARM_NEON_RND64X_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// src: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-09/msg00053.html -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnd64x_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64x_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } else { - r_.values[i] = simde_math_rintf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } - } - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64x_f32 - #define vrnd64x_f32(a) simde_vrnd64x_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnd64x_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64x_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } else { - r_.values[i] = simde_math_rint(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } - } - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64x_f64 - #define vrnd64x_f64(a) simde_vrnd64x_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrnd64xq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64xq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } else { - r_.values[i] = simde_math_rintf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } - } - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64xq_f32 - #define vrnd64xq_f32(a) simde_vrnd64xq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrnd64xq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64xq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } else { - r_.values[i] = simde_math_rint(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } - } - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64xq_f64 - #define vrnd64xq_f64(a) simde_vrnd64xq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RND64X_H) */ -/* :: End simde/arm/neon/rnd64x.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnd64z.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RND64Z_H) -#define SIMDE_ARM_NEON_RND64Z_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// src: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-09/msg00053.html -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnd64z_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64z_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } else { - r_.values[i] = simde_math_truncf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } - } - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64z_f32 - #define vrnd64z_f32(a) simde_vrnd64z_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnd64z_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64z_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } else { - r_.values[i] = simde_math_trunc(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } - } - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64z_f64 - #define vrnd64z_f64(a) simde_vrnd64z_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrnd64zq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64zq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnanf(a_.values[i]) || simde_math_isinff(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } else { - r_.values[i] = simde_math_truncf(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(float, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(float, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(float, INT64_MIN); - } - } - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64zq_f32 - #define vrnd64zq_f32(a) simde_vrnd64zq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrnd64zq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_FRINT) - return vrnd64zq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - if (simde_math_isnan(a_.values[i]) || simde_math_isinf(a_.values[i])) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } else { - r_.values[i] = simde_math_trunc(a_.values[i]); - if (r_.values[i] > HEDLEY_STATIC_CAST(double, INT64_MAX) || r_.values[i] < HEDLEY_STATIC_CAST(double, INT64_MIN)) { - r_.values[i] = HEDLEY_STATIC_CAST(double, INT64_MIN); - } - } - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnd64zq_f64 - #define vrnd64zq_f64(a) simde_vrnd64zq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RND64Z_H) */ -/* :: End simde/arm/neon/rnd64z.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rnda.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDA_H) -#define SIMDE_ARM_NEON_RNDA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndah_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndah_f16(a); - #else - return simde_float16_from_float32(simde_math_roundf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndah_f16 - #define vrndah_f16(a) simde_vrndah_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrnda_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrnda_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndah_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrnda_f16 - #define vrnda_f16(a) simde_vrnda_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrnda_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrnda_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_roundf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrnda_f32 - #define vrnda_f32(a) simde_vrnda_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrnda_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrnda_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_round(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrnda_f64 - #define vrnda_f64(a) simde_vrnda_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndaq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndaq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndah_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndaq_f16 - #define vrndaq_f16(a) simde_vrndaq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndaq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndaq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_roundf(a_.values[i]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndaq_f32 - #define vrndaq_f32(a) simde_vrndaq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndaq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndaq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_round(a_.values[i]); - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndaq_f64 - #define vrndaq_f64(a) simde_vrndaq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDA_H) */ -/* :: End simde/arm/neon/rnda.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rndm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDM_H) -#define SIMDE_ARM_NEON_RNDM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndmh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndmh_f16(a); - #else - return simde_float16_from_float32(simde_math_floorf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndmh_f16 - #define vrndmh_f16(a) simde_vrndmh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrndm_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndm_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndmh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndm_f16 - #define vrndm_f16(a) simde_vrndm_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrndm_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndm_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_floorf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndm_f32 - #define vrndm_f32(a) simde_vrndm_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrndm_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndm_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_floor(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndm_f64 - #define vrndm_f64(a) simde_vrndm_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndmq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndmq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndmh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndmq_f16 - #define vrndmq_f16(a) simde_vrndmq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndmq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndmq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_floor(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_NEG_INF); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_floor_ps(a_.m128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_floorf(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndmq_f32 - #define vrndmq_f32(a) simde_vrndmq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndmq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndmq_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_floor(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_NEG_INF); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128d = _mm_floor_pd(a_.m128d); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_floor(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndmq_f64 - #define vrndmq_f64(a) simde_vrndmq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDM_H) */ -/* :: End simde/arm/neon/rndm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rndi.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDI_H) -#define SIMDE_ARM_NEON_RNDI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndih_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vrndih_f16(a); - #else - return simde_float16_from_float32(simde_math_nearbyintf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndih_f16 - #define vrndih_f16(a) simde_vrndih_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrndi_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vrndi_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndih_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndi_f16 - #define vrndi_f16(a) simde_vrndi_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrndi_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vrndi_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_nearbyintf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndi_f32 - #define vrndi_f32(a) simde_vrndi_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrndi_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vrndi_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_nearbyint(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndi_f64 - #define vrndi_f64(a) simde_vrndi_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndiq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) - return vrndiq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndih_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndiq_f16 - #define vrndiq_f16(a) simde_vrndiq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndiq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vrndiq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_CUR_DIRECTION); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_nearbyintf(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndiq_f32 - #define vrndiq_f32(a) simde_vrndiq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndiq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vrndiq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_CUR_DIRECTION); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_nearbyint(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndiq_f64 - #define vrndiq_f64(a) simde_vrndiq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDI_H) */ -/* :: End simde/arm/neon/rndi.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rndn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDN_H) -#define SIMDE_ARM_NEON_RNDN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndnh_f16(simde_float16_t a) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && \ - (!defined(HEDLEY_GCC_VERSION) || (defined(SIMDE_ARM_NEON_A64V8_NATIVE) && HEDLEY_GCC_VERSION_CHECK(8,0,0))) && defined(SIMDE_ARM_NEON_FP16) - return vrndnh_f16(a); - #else - simde_float32_t a_ = simde_float16_to_float32(a); - return simde_float16_from_float32(simde_math_roundevenf(a_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndnh_f16 - #define vrndnh_f16(a) simde_vrndnh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrndns_f32(simde_float32_t a) { - #if \ - defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && \ - (!defined(HEDLEY_GCC_VERSION) || (defined(SIMDE_ARM_NEON_A64V8_NATIVE) && HEDLEY_GCC_VERSION_CHECK(8,0,0))) - return vrndns_f32(a); - #else - return simde_math_roundevenf(a); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndns_f32 - #define vrndns_f32(a) simde_vrndns_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrndn_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndn_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndnh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndn_f16 - #define vrndn_f16(a) simde_vrndn_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrndn_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndn_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndns_f32(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndn_f32 - #define vrndn_f32(a) simde_vrndn_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrndn_f64(simde_float64x1_t a) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndn_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_roundeven(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndn_f64 - #define vrndn_f64(a) simde_vrndn_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndnq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndnq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndnh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndnq_f16 - #define vrndnq_f16(a) simde_vrndnq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndnq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndnq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_NEAREST_INT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndns_f32(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndnq_f32 - #define vrndnq_f32(a) simde_vrndnq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndnq_f64(simde_float64x2_t a) { - #if \ - defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndnq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_NEAREST_INT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_roundeven(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndnq_f64 - #define vrndnq_f64(a) simde_vrndnq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDN_H) */ -/* :: End simde/arm/neon/rndn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rndp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDP_H) -#define SIMDE_ARM_NEON_RNDP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndph_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndph_f16(a); - #else - return simde_float16_from_float32(simde_math_ceilf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndph_f16 - #define vrndph_f16(a) simde_vrndph_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrndp_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndp_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndph_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndp_f16 - #define vrndp_f16(a) simde_vrndp_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrndp_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndp_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_ceilf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndp_f32 - #define vrndp_f32(a) simde_vrndp_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrndp_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndp_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_ceil(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndp_f64 - #define vrndp_f64(a) simde_vrndp_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndpq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndpq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndph_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndpq_f16 - #define vrndpq_f16(a) simde_vrndpq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndpq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndpq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_ceil(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_POS_INF); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_ceil_ps(a_.m128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_ceilf(a_.values[i]); - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrndpq_f32 - #define vrndpq_f32(a) simde_vrndpq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndpq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndpq_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_ceil(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_POS_INF); - #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - r_.m128d = _mm_ceil_pd(a_.m128d); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_ceil(a_.values[i]); - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndpq_f64 - #define vrndpq_f64(a) simde_vrndpq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDP_H) */ -/* :: End simde/arm/neon/rndp.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rndx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RNDX_H) -#define SIMDE_ARM_NEON_RNDX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrndxh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndxh_f16(a); - #else - return simde_float16_from_float32(simde_math_rintf(simde_float16_to_float32(a))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndxh_f16 - #define vrndxh_f16(a) simde_vrndxh_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrndx_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndx_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndxh_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndx_f16 - #define vrndx_f16(a) simde_vrndx_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrndx_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndx_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_rintf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndx_f32 - #define vrndx_f32(a) simde_vrndx_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrndx_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndx_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_rint(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndx_f64 - #define vrndx_f64(a) simde_vrndx_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrndxq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrndxq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrndxh_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndxq_f16 - #define vrndxq_f16(a) simde_vrndxq_f16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrndxq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - return vrndxq_f32(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_rintf(a_.values[i]); - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrndxq_f32 - #define vrndxq_f32(a) simde_vrndxq_f32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrndxq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrndxq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_rint(a_.values[i]); - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrndxq_f64 - #define vrndxq_f64(a) simde_vrndxq_f64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RNDX_H) */ -/* :: End simde/arm/neon/rndx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rshl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_RSHL_H) -#define SIMDE_ARM_NEON_RSHL_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Notes from the implementer (Christopher Moore aka rosbif) - * - * I have tried to exactly reproduce the documented behaviour of the - * ARM NEON rshl and rshlq intrinsics. - * This is complicated for the following reasons:- - * - * a) Negative shift counts shift right. - * - * b) Only the low byte of the shift count is used but the shift count - * is not limited to 8-bit values (-128 to 127). - * - * c) Overflow must be avoided when rounding, together with sign change - * warning/errors in the C versions. - * - * d) Intel SIMD is not nearly as complete as NEON and AltiVec. - * There were no intrisics with a vector shift count before AVX2 which - * only has 32 and 64-bit logical ones and only a 32-bit arithmetic - * one. The others need AVX512. There are no 8-bit shift intrinsics at - * all, even with a scalar shift count. It is surprising to use AVX2 - * and even AVX512 to implement a 64-bit vector operation. - * - * e) Many shift implementations, and the C standard, do not treat a - * shift count >= the object's size in bits as one would expect. - * (Personally I feel that > is silly but == can be useful.) - * - * Note that even the C17/18 standard does not define the behaviour of - * a right shift of a negative value. - * However Evan and I agree that all compilers likely to be used - * implement this as an arithmetic right shift with sign extension. - * If this is not the case it could be replaced by a logical right shift - * if negative values are complemented before and after the shift. - * - * Some of the SIMD translations may be slower than the portable code, - * particularly those for vectors with only one or two elements. - * But I had fun writing them ;-) - * - */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vrshld_s64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrshld_s64(a, b); - #else - b = HEDLEY_STATIC_CAST(int8_t, b); - return - (simde_math_llabs(b) >= 64) - ? 0 - : (b >= 0) - ? (a << b) - : ((a + (INT64_C(1) << (-b - 1))) >> -b); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshld_s64 - #define vrshld_s64(a, b) simde_vrshld_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vrshld_u64(uint64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrshld_u64(a, HEDLEY_STATIC_CAST(int64_t, b)); - #else - b = HEDLEY_STATIC_CAST(int8_t, b); - return - (b >= 64) ? 0 : - (b >= 0) ? (a << b) : - (b >= -64) ? (((b == -64) ? 0 : (a >> -b)) + ((a >> (-b - 1)) & 1)) : 0; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshld_u64 - #define vrshld_u64(a, b) simde_vrshld_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi16(zero, zero); - __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); - __m128i a128_shr = _mm_srav_epi16(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), - _mm_srai_epi16(_mm_sub_epi16(a128_shr, ff), 1), - _mm_cmpgt_epi16(zero, b128)); - r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi32(zero, zero); - __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a_.m64)); - __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); - __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), - _mm256_cmpgt_epi32(zero, b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); - r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, - (simde_math_abs(b_.values[i]) >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_s8 - #define vrshl_s8(a, b) simde_vrshl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), - _mm_cmpgt_epi32(zero, b128)); - r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, - (simde_math_abs(b_.values[i]) >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_s16 - #define vrshl_s16(a, b) simde_vrshl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), - _mm_cmpgt_epi32(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, - (simde_math_abs(b_.values[i]) >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_s32 - #define vrshl_s32(a, b) simde_vrshl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vrshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi64(zero, zero); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); - __m128i a128_shr = _mm_srav_epi64(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), - _mm_srai_epi64(_mm_sub_epi64(a128_shr, ff), 1), - _mm_cmpgt_epi64(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ones = _mm_set1_epi64x(1); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - __m128i maska = _mm_cmpgt_epi64(zero, a128); - __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); - __m128i a128_rnd = _mm_and_si128(_mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)), ones); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), - _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b128_abs), maska), a128_rnd), - _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrshld_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_s64 - #define vrshl_s64(a, b) simde_vrshl_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi16(zero, zero); - __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); - __m128i a128_shr = _mm_srlv_epi16(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), - _mm_srli_epi16(_mm_sub_epi16(a128_shr, ff), 1), - _mm_cmpgt_epi16(zero, b128)); - r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi32(zero, zero); - __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a_.m64)); - __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); - __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), - _mm256_cmpgt_epi32(zero, b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); - r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, - (b_.values[i] >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_u8 - #define vrshl_u8(a, b) simde_vrshl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), - _mm_cmpgt_epi32(zero, b128)); - r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, - (b_.values[i] >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_u16 - #define vrshl_u16(a, b) simde_vrshl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), - _mm_cmpgt_epi32(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = - (b_.values[i] >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_u32 - #define vrshl_u32(a, b) simde_vrshl_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vrshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshl_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - simde_int64x1_private b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi64(zero, zero); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); - __m128i a128_shr = _mm_srlv_epi64(a128, _mm_xor_si128(b128, ff)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), - _mm_srli_epi64(_mm_sub_epi64(a128_shr, ff), 1), - _mm_cmpgt_epi64(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - const __m128i ones = _mm_set1_epi64x(1); - const __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); - __m128i a128_shr = _mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), - _mm_srli_epi64(_mm_add_epi64(a128_shr, ones), 1), - _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrshld_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); -#endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshl_u64 - #define vrshl_u64(a, b) simde_vrshl_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; - - b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); - a_shr = vec_sra(a, vec_sub(b_abs, ones)); - return vec_and(vec_sel(vec_sl(a, b_abs), - vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), ones))), - vec_cmplt(b, zero)), - vec_cmplt(b_abs, max)); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi16(zero, zero); - __m256i a256 = _mm256_cvtepi8_epi16(a_.m128i); - __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); - __m256i a256_shr = _mm256_srav_epi16(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), - _mm256_srai_epi16(_mm256_sub_epi16(a256_shr, ff), 1), - _mm256_cmpgt_epi16(zero, b256)); - r_.m128i = _mm256_cvtepi16_epi8(r256); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, - (simde_math_abs(b_.values[i]) >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_s8 - #define vrshlq_s8(a, b) simde_vrshlq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - a_shr = vec_sra(a, vec_sub(b_abs, ones)); - return vec_and(vec_sel(vec_sl(a, b_abs), - vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), ones))), - vec_cmplt(vec_sl(b, shift), zero)), - vec_cmplt(b_abs, max)); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi16(zero, zero); - __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); - __m128i a_shr = _mm_srav_epi16(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), - _mm_srai_epi16(_mm_sub_epi16(a_shr, ff), 1), - _mm_cmpgt_epi16(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi32(zero, zero); - __m256i a256 = _mm256_cvtepi16_epi32(a_.m128i); - __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); - b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); - __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), - _mm256_cmpgt_epi32(zero, b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); - r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, - (simde_math_abs(b_.values[i]) >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_s16 - #define vrshlq_s16(a, b) simde_vrshlq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - a_shr = vec_sra(a, vec_sub(b_abs, ones)); - return vec_and(vec_sel(vec_sl(a, b_abs), - vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), ones))), - vec_cmplt(vec_sl(b, shift), zero)), - vec_cmplt(b_abs, max)); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); - __m128i a_shr = _mm_srav_epi32(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), - _mm_srai_epi32(_mm_sub_epi32(a_shr, ff), 1), - _mm_cmpgt_epi32(zero, B)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, - (simde_math_abs(b_.values[i]) >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_s32 - #define vrshlq_s32(a, b) simde_vrshlq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vrshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - a_shr = vec_sra(a, vec_sub(b_abs, ones)); - - HEDLEY_DIAGNOSTIC_PUSH - #if defined(SIMDE_BUG_CLANG_46770) - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif - return vec_and(vec_sel(vec_sl(a, b_abs), - vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), ones))), - vec_cmplt(vec_sl(b, shift), zero)), - vec_cmplt(b_abs, max)); - HEDLEY_DIAGNOSTIC_POP - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); - __m128i a_shr = _mm_srav_epi64(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), - _mm_srai_epi64(_mm_sub_epi64(a_shr, ff), 1), - _mm_cmpgt_epi64(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ones = _mm_set1_epi64x(1); - __m128i maska = _mm_cmpgt_epi64(zero, a_.m128i); - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); - __m128i a_rnd = _mm_and_si128(_mm_srlv_epi64(a_.m128i, _mm_sub_epi64(b_abs, ones)), ones); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), - _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a_.m128i, maska), b_abs), maska), a_rnd), - _mm_cmpgt_epi64(zero, _mm_slli_epi64(b_.m128i, 56))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrshld_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_s64 - #define vrshlq_s64(a, b) simde_vrshlq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vrshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_abs_dec, a_shr; - - b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); - b_abs_dec = vec_sub(b_abs, ones); - a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); - return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), - vec_sr(vec_add(a_shr, ones), ones), - vec_cmplt(b, zero)); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi32(zero, zero); - __m256i a256 = _mm256_cvtepu8_epi16(a_.m128i); - __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); - __m256i a256_shr = _mm256_srlv_epi16(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), - _mm256_srli_epi16(_mm256_sub_epi16(a256_shr, ff), 1), - _mm256_cmpgt_epi16(zero, b256)); - r_.m128i = _mm256_cvtepi16_epi8(r256); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, - (b_.values[i] >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_u8 - #define vrshlq_u8(a, b) simde_vrshlq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vrshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_abs_dec, a_shr; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - b_abs_dec = vec_sub(b_abs, ones); - a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); - return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), - vec_sr(vec_add(a_shr, ones), ones), - vec_cmplt(vec_sl(b, shift), zero)); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi16(zero, zero); - __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); - __m128i a_shr = _mm_srlv_epi16(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), - _mm_srli_epi16(_mm_sub_epi16(a_shr, ff), 1), - _mm_cmpgt_epi16(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) - const __m256i zero = _mm256_setzero_si256(); - const __m256i ff = _mm256_cmpeq_epi32(zero, zero); - __m256i a256 = _mm256_cvtepu16_epi32(a_.m128i); - __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); - b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); - __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), - _mm256_cmpgt_epi32(zero, b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); - r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, - (b_.values[i] >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_u16 - #define vrshlq_u16(a, b) simde_vrshlq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_abs_dec, a_shr; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - b_abs_dec = vec_sub(b_abs, ones); - a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); - return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), - vec_sr(vec_add(a_shr, ones), ones), - vec_cmplt(vec_sl(b, shift), zero)); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi32(zero, zero); - __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); - __m128i a_shr = _mm_srlv_epi32(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), - _mm_srli_epi32(_mm_sub_epi32(a_shr, ff), 1), - _mm_cmpgt_epi32(zero, B)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = - (b_.values[i] >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : - 0; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_u32 - #define vrshlq_u32(a, b) simde_vrshlq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vrshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrshlq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_abs_dec, a_shr; - - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - ff); - b_abs_dec = vec_sub(b_abs, ones); - a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); - HEDLEY_DIAGNOSTIC_PUSH - #if defined(SIMDE_BUG_CLANG_46770) - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif - return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), - vec_sr(vec_add(a_shr, ones), ones), - vec_cmplt(vec_sl(b, shift), zero)); - HEDLEY_DIAGNOSTIC_POP - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - simde_int64x2_private b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i zero = _mm_setzero_si128(); - const __m128i ff = _mm_cmpeq_epi64(zero, zero); - __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); - __m128i a_shr = _mm_srlv_epi64(a_.m128i, _mm_xor_si128(B, ff)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), - _mm_srli_epi64(_mm_sub_epi64(a_shr, ff), 1), - _mm_cmpgt_epi64(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i ones = _mm_set1_epi64x(1); - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); - __m128i a_shr = _mm_srlv_epi64(a_.m128i, _mm_sub_epi64(b_abs, ones)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), - _mm_srli_epi64(_mm_add_epi64(a_shr, ones), 1), - _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b_.m128i, 56))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrshld_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshlq_u64 - #define vrshlq_u64(a, b) simde_vrshlq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSHL_H) */ -/* :: End simde/arm/neon/rshl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rshrn_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) -#define SIMDE_ARM_NEON_RSHRN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rshrn_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - */ - -#if !defined(SIMDE_ARM_NEON_RSHRN_N_H) -#define SIMDE_ARM_NEON_RSHRN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_s16(a, n) vrshrn_n_s16((a), (n)) -#else - #define simde_vrshrn_n_s16(a, n) simde_vmovn_s16(simde_vrshrq_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_s16 - #define vrshrn_n_s16(a, n) simde_vrshrn_n_s16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_s32(a, n) vrshrn_n_s32((a), (n)) -#else - #define simde_vrshrn_n_s32(a, n) simde_vmovn_s32(simde_vrshrq_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_s32 - #define vrshrn_n_s32(a, n) simde_vrshrn_n_s32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_s64(a, n) vrshrn_n_s64((a), (n)) -#else - #define simde_vrshrn_n_s64(a, n) simde_vmovn_s64(simde_vrshrq_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_s64 - #define vrshrn_n_s64(a, n) simde_vrshrn_n_s64((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_u16(a, n) vrshrn_n_u16((a), (n)) -#else - #define simde_vrshrn_n_u16(a, n) simde_vmovn_u16(simde_vrshrq_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_u16 - #define vrshrn_n_u16(a, n) simde_vrshrn_n_u16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_u32(a, n) vrshrn_n_u32((a), (n)) -#else - #define simde_vrshrn_n_u32(a, n) simde_vmovn_u32(simde_vrshrq_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_u32 - #define vrshrn_n_u32(a, n) simde_vrshrn_n_u32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrshrn_n_u64(a, n) vrshrn_n_u64((a), (n)) -#else - #define simde_vrshrn_n_u64(a, n) simde_vmovn_u64(simde_vrshrq_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrshrn_n_u64 - #define vrshrn_n_u64(a, n) simde_vrshrn_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSHRN_N_H) */ -/* :: End simde/arm/neon/rshrn_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_s16(r, a, n) vrshrn_high_n_s16((r), (a), (n)) -#else - #define simde_vrshrn_high_n_s16(r, a, n) simde_vcombine_s8(r, simde_vrshrn_n_s16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_s16 - #define vrshrn_high_n_s16(r, a, n) simde_vrshrn_high_n_s16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_s32(r, a, n) vrshrn_high_n_s32((r), (a), (n)) -#else - #define simde_vrshrn_high_n_s32(r, a, n) simde_vcombine_s16(r, simde_vrshrn_n_s32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_s32 - #define vrshrn_high_n_s32(r, a, n) simde_vrshrn_high_n_s32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_s64(r, a, n) vrshrn_high_n_s64((r), (a), (n)) -#else - #define simde_vrshrn_high_n_s64(r, a, n) simde_vcombine_s32(r, simde_vrshrn_n_s64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_s64 - #define vrshrn_high_n_s64(r, a, n) simde_vrshrn_high_n_s64((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_u16(r, a, n) vrshrn_high_n_u16((r), (a), (n)) -#else - #define simde_vrshrn_high_n_u16(r, a, n) simde_vcombine_u8(r, simde_vrshrn_n_u16(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_u16 - #define vrshrn_high_n_u16(r, a, n) simde_vrshrn_high_n_u16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_u32(r, a, n) vrshrn_high_n_u32((r), (a), (n)) -#else - #define simde_vrshrn_high_n_u32(r, a, n) simde_vcombine_u16(r, simde_vrshrn_n_u32(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_u32 - #define vrshrn_high_n_u32(r, a, n) simde_vrshrn_high_n_u32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrshrn_high_n_u64(r, a, n) vrshrn_high_n_u64((r), (a), (n)) -#else - #define simde_vrshrn_high_n_u64(r, a, n) simde_vcombine_u32(r, simde_vrshrn_n_u64(a, n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrshrn_high_n_u64 - #define vrshrn_high_n_u64(r, a, n) simde_vrshrn_high_n_u64((r), (a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) */ -/* :: End simde/arm/neon/rshrn_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rsqrte.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSQRTE_H) -#define SIMDE_ARM_NEON_RSQRTE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrsqrteh_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrteh_f16(a); - #else - #if defined(simde_math_sqrtf) - simde_float32_t r_; - simde_float32_t a_ = simde_float16_to_float32(a); - r_ = 1.0f / simde_math_sqrtf(a_); - return simde_float16_from_float32(r_); - #else - HEDLEY_UNREACHABLE(); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrteh_f16 - #define vrsqrteh_f16(a) simde_vrsqrteh_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrsqrtes_f32(simde_float32_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrtes_f32(a); - #else - #if defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - #if SIMDE_ACCURACY_PREFERENCE <= 0 - return (INT32_C(0x5F37624F) - (a >> 1)); - #else - simde_float32 x = a; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - return x; - #endif - #elif defined(simde_math_sqrtf) - return 1.0f / simde_math_sqrtf(a); - #else - HEDLEY_UNREACHABLE(); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtes_f32 - #define vrsqrtes_f32(a) simde_vrsqrtes_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vrsqrted_f64(simde_float64_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrted_f64(a); - #else - #if defined(SIMDE_IEEE754_STORAGE) - //https://www.mdpi.com/1099-4300/23/1/86/htm - simde_float64_t x = a; - simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; - int64_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); - simde_memcpy(&x, &ix, sizeof(x)); - x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); - x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); - return x; - #elif defined(simde_math_sqrtf) - return SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); - #else - HEDLEY_UNREACHABLE(); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrted_f64 - #define vrsqrted_f64(a) simde_vrsqrted_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrsqrte_u32(simde_uint32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrte_u32(a); - #else - simde_uint32x2_private - a_ = simde_uint32x2_to_private(a), - r_; - - for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[i])) ; i++) { - if (a_.values[i] < 0x3FFFFFFF) { - r_.values[i] = UINT32_MAX; - } else { - uint32_t a_temp = (a_.values[i] >> 23) & 511; - if (a_temp < 256) { - a_temp = a_temp * 2 + 1; - } else { - a_temp = (a_temp >> 1) << 1; - a_temp = (a_temp + 1) * 2; - } - uint32_t b = 512; - while((a_temp * (b + 1) * (b + 1)) < (1 << 28)) - b = b + 1; - r_.values[i] = (b + 1) / 2; - r_.values[i] = r_.values[i] << 23; - } - } - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrte_u32 - #define vrsqrte_u32(a) simde_vrsqrte_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrsqrte_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrte_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrsqrteh_f16(a_.values[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrte_f16 - #define vrsqrte_f16(a) simde_vrsqrte_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrsqrte_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrte_f32(a); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - #if defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.values[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.values[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrte_f32 - #define vrsqrte_f32(a) simde_vrsqrte_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrsqrte_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrte_f64(a); - #else - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - #if defined(SIMDE_IEEE754_STORAGE) - //https://www.mdpi.com/1099-4300/23/1/86/htm - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_.values)/sizeof(r_.values[0])) ; i++) { - simde_float64_t x = a_.values[i]; - simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; - int64_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); - simde_memcpy(&x, &ix, sizeof(x)); - x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); - x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); - r_.values[i] = x; - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrte_f64 - #define vrsqrte_f64(a) simde_vrsqrte_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vrsqrteq_u32(simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrteq_u32(a); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - r_; - - for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[i])) ; i++) { - if (a_.values[i] < 0x3FFFFFFF) { - r_.values[i] = UINT32_MAX; - } else { - uint32_t a_temp = (a_.values[i] >> 23) & 511; - if (a_temp < 256) { - a_temp = a_temp * 2 + 1; - } else { - a_temp = (a_temp >> 1) << 1; - a_temp = (a_temp + 1) * 2; - } - uint32_t b = 512; - while((a_temp * (b + 1) * (b + 1)) < (1 << 28)) - b = b + 1; - r_.values[i] = (b + 1) / 2; - r_.values[i] = r_.values[i] << 23; - } - } - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrteq_u32 - #define vrsqrteq_u32(a) simde_vrsqrteq_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrsqrteq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrteq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vrsqrteh_f16(a_.values[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrteq_f16 - #define vrsqrteq_f16(a) simde_vrsqrteq_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrsqrteq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrteq_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_rsqrte(a); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - #if defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_rsqrt_ps(a_.m128); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.values[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.values[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrteq_f32 - #define vrsqrteq_f32(a) simde_vrsqrteq_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrsqrteq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrteq_f64(a); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - #if defined(SIMDE_IEEE754_STORAGE) - //https://www.mdpi.com/1099-4300/23/1/86/htm - SIMDE_VECTORIZE - for(size_t i = 0 ; i < (sizeof(r_.values)/sizeof(r_.values[0])) ; i++) { - simde_float64_t x = a_.values[i]; - simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; - int64_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); - simde_memcpy(&x, &ix, sizeof(x)); - x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); - x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); - r_.values[i] = x; - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrteq_f64 - #define vrsqrteq_f64(a) simde_vrsqrteq_f64((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_RSQRTE_H) */ -/* :: End simde/arm/neon/rsqrte.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rsqrts.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSQRTS_H) -#define SIMDE_ARM_NEON_RSQRTS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vrsqrtsh_f16(simde_float16_t a, simde_float16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrtsh_f16(a, b); - #else - return - simde_vmulh_f16( - simde_vsubh_f16( - SIMDE_FLOAT16_VALUE(3.0), - simde_vmulh_f16(a, b)), - SIMDE_FLOAT16_VALUE(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtsh_f16 - #define vrsqrtsh_f16(a, b) simde_vrsqrtsh_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32_t -simde_vrsqrtss_f32(simde_float32_t a, simde_float32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrtss_f32(a, b); - #else - return SIMDE_FLOAT32_C(0.5) * (SIMDE_FLOAT32_C(3.0) - (a * b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtss_f32 - #define vrsqrtss_f32(a, b) simde_vrsqrtss_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64_t -simde_vrsqrtsd_f64(simde_float64_t a, simde_float64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrtsd_f64(a, b); - #else - return SIMDE_FLOAT64_C(0.5) * (SIMDE_FLOAT64_C(3.0) - (a * b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtsd_f64 - #define vrsqrtsd_f64(a, b) simde_vrsqrtsd_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vrsqrts_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrts_f16(a, b); - #else - return - simde_vmul_n_f16( - simde_vsub_f16( - simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(3.0)), - simde_vmul_f16(a, b)), - SIMDE_FLOAT16_VALUE(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrts_f16 - #define vrsqrts_f16(a, b) simde_vrsqrts_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vrsqrts_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrts_f32(a, b); - #else - return - simde_vmul_n_f32( - simde_vmls_f32( - simde_vdup_n_f32(SIMDE_FLOAT32_C(3.0)), - a, - b), - SIMDE_FLOAT32_C(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrts_f32 - #define vrsqrts_f32(a, b) simde_vrsqrts_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vrsqrts_f64(simde_float64x1_t a, simde_float64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrts_f64(a, b); - #else - return - simde_vmul_n_f64( - simde_vmls_f64( - simde_vdup_n_f64(SIMDE_FLOAT64_C(3.0)), - a, - b), - SIMDE_FLOAT64_C(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrts_f64 - #define vrsqrts_f64(a, b) simde_vrsqrts_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vrsqrtsq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vrsqrtsq_f16(a, b); - #else - return - simde_vmulq_n_f16( - simde_vsubq_f16( - simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(3.0)), - simde_vmulq_f16(a, b)), - SIMDE_FLOAT16_VALUE(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtsq_f16 - #define vrsqrtsq_f16(a, b) simde_vrsqrtsq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vrsqrtsq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsqrtsq_f32(a, b); - #else - return - simde_vmulq_n_f32( - simde_vmlsq_f32( - simde_vdupq_n_f32(SIMDE_FLOAT32_C(3.0)), - a, - b), - SIMDE_FLOAT32_C(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsqrtsq_f32 - #define vrsqrtsq_f32(a, b) simde_vrsqrtsq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vrsqrtsq_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vrsqrtsq_f64(a, b); - #else - return - simde_vmulq_n_f64( - simde_vmlsq_f64( - simde_vdupq_n_f64(SIMDE_FLOAT64_C(3.0)), - a, - b), - SIMDE_FLOAT64_C(0.5) - ); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsqrtsq_f64 - #define vrsqrtsq_f64(a, b) simde_vrsqrtsq_f64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_RSQRTS_H) */ -/* :: End simde/arm/neon/rsqrts.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rsra_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_RSRA_N_H) -#define SIMDE_ARM_NEON_RSRA_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Remark: For these instructions - * 1 <= n <= data element size in bits - * so 0 <= n - 1 < data element size in bits - */ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsrad_n_s64(a, b, n) vrsrad_n_s64(a, b, n) -#else - #define simde_vrsrad_n_s64(a, b, n) simde_vaddd_s64((a), simde_vrshrd_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsrad_n_s64 - #define vrsrad_n_s64(a, b, n) simde_vrsrad_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsrad_n_u64(a, b, n) vrsrad_n_u64(a, b, n) -#else - #define simde_vrsrad_n_u64(a, b, n) simde_vaddd_u64((a), simde_vrshrd_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsrad_n_u64 - #define vrsrad_n_u64(a, b, n) simde_vrsrad_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_s8(a, b, n) vrsraq_n_s8((a), (b), (n)) -#else - #define simde_vrsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vrshrq_n_s8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_s8 - #define vrsraq_n_s8(a, b, n) simde_vrsraq_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_s16(a, b, n) vrsraq_n_s16((a), (b), (n)) -#else - #define simde_vrsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vrshrq_n_s16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_s16 - #define vrsraq_n_s16(a, b, n) simde_vrsraq_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_s32(a, b, n) vrsraq_n_s32((a), (b), (n)) -#else - #define simde_vrsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vrshrq_n_s32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_s32 - #define vrsraq_n_s32(a, b, n) simde_vrsraq_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_s64(a, b, n) vrsraq_n_s64((a), (b), (n)) -#else - #define simde_vrsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vrshrq_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_s64 - #define vrsraq_n_s64(a, b, n) simde_vrsraq_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_u8(a, b, n) vrsraq_n_u8((a), (b), (n)) -#else - #define simde_vrsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vrshrq_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_u8 - #define vrsraq_n_u8(a, b, n) simde_vrsraq_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_u16(a, b, n) vrsraq_n_u16((a), (b), (n)) -#else - #define simde_vrsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vrshrq_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_u16 - #define vrsraq_n_u16(a, b, n) simde_vrsraq_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_u32(a, b, n) vrsraq_n_u32((a), (b), (n)) -#else - #define simde_vrsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vrshrq_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_u32 - #define vrsraq_n_u32(a, b, n) simde_vrsraq_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsraq_n_u64(a, b, n) vrsraq_n_u64((a), (b), (n)) -#else - #define simde_vrsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vrshrq_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsraq_n_u64 - #define vrsraq_n_u64(a, b, n) simde_vrsraq_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_s8(a, b, n) vrsra_n_s8((a), (b), (n)) -#else - #define simde_vrsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vrshr_n_s8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_s8 - #define vrsra_n_s8(a, b, n) simde_vrsra_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_s16(a, b, n) vrsra_n_s16((a), (b), (n)) -#else - #define simde_vrsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vrshr_n_s16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_s16 - #define vrsra_n_s16(a, b, n) simde_vrsra_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_s32(a, b, n) vrsra_n_s32((a), (b), (n)) -#else - #define simde_vrsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vrshr_n_s32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_s32 - #define vrsra_n_s32(a, b, n) simde_vrsra_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_s64(a, b, n) vrsra_n_s64((a), (b), (n)) -#else - #define simde_vrsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vrshr_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_s64 - #define vrsra_n_s64(a, b, n) simde_vrsra_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_u8(a, b, n) vrsra_n_u8((a), (b), (n)) -#else - #define simde_vrsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vrshr_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_u8 - #define vrsra_n_u8(a, b, n) simde_vrsra_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_u16(a, b, n) vrsra_n_u16((a), (b), (n)) -#else - #define simde_vrsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vrshr_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_u16 - #define vrsra_n_u16(a, b, n) simde_vrsra_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_u32(a, b, n) vrsra_n_u32((a), (b), (n)) -#else - #define simde_vrsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vrshr_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_u32 - #define vrsra_n_u32(a, b, n) simde_vrsra_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vrsra_n_u64(a, b, n) vrsra_n_u64((a), (b), (n)) -#else - #define simde_vrsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vrshr_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsra_n_u64 - #define vrsra_n_u64(a, b, n) simde_vrsra_n_u64((a), (b), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSRA_N_H) */ -/* :: End simde/arm/neon/rsra_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rsubhn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSUBHN_H) -#define SIMDE_ARM_NEON_RSUBHN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vrsubhn_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - int16_t round_cast = 1 << 7; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] - b_.values[i] + round_cast); - } - return simde_vmovn_s16(simde_vshrq_n_s16(simde_int16x8_from_private(r_), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_s16 - #define vrsubhn_s16(a, b) simde_vrsubhn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vrsubhn_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - int round_cast = 1 << 15; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i] + round_cast; - } - return simde_vmovn_s32(simde_vshrq_n_s32(simde_int32x4_from_private(r_), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_s32 - #define vrsubhn_s32(a, b) simde_vrsubhn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vrsubhn_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - int64_t round_cast = 1ll << 31; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] - b_.values[i] + round_cast) >> 32); - } - return simde_vmovn_s64(simde_int64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_s64 - #define vrsubhn_s64(a, b) simde_vrsubhn_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vrsubhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - uint16_t round_cast = 1 << 7; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] - b_.values[i] + round_cast); - } - return simde_vmovn_u16(simde_vshrq_n_u16(simde_uint16x8_from_private(r_), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_u16 - #define vrsubhn_u16(a, b) simde_vrsubhn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vrsubhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - uint32_t round_cast = 1 << 15; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i] + round_cast; - } - return simde_vmovn_u32(simde_vshrq_n_u32(simde_uint32x4_from_private(r_), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_u32 - #define vrsubhn_u32(a, b) simde_vrsubhn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vrsubhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vrsubhn_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - uint64_t round_cast = 1ull << 31; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((a_.values[i] - b_.values[i] + round_cast) >> 32); - } - return simde_vmovn_u64(simde_uint64x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_u64 - #define vrsubhn_u64(a, b) simde_vrsubhn_u64((a), (b)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSUBHN_H) */ -/* :: End simde/arm/neon/rsubhn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/rsubhn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_RSUBHN_HIGH_H) -#define SIMDE_ARM_NEON_RSUBHN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_s16(r, a, b) vrsubhn_high_s16((r), (a), (b)) -#else - #define simde_vrsubhn_high_s16(r, a, b) simde_vcombine_s8(r, simde_vrsubhn_s16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_s16 - #define vrsubhn_high_s16(r, a, b) simde_vrsubhn_high_s16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_s32(r, a, b) vrsubhn_high_s32((r), (a), (b)) -#else - #define simde_vrsubhn_high_s32(r, a, b) simde_vcombine_s16(r, simde_vrsubhn_s32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_s32 - #define vrsubhn_high_s32(r, a, b) simde_vrsubhn_high_s32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_s64(r, a, b) vrsubhn_high_s64((r), (a), (b)) -#else - #define simde_vrsubhn_high_s64(r, a, b) simde_vcombine_s32(r, simde_vrsubhn_s64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_s64 - #define vrsubhn_high_s64(r, a, b) simde_vrsubhn_high_s64((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_u16(r, a, b) vrsubhn_high_u16((r), (a), (b)) -#else - #define simde_vrsubhn_high_u16(r, a, b) simde_vcombine_u8(r, simde_vrsubhn_u16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_u16 - #define vrsubhn_high_u16(r, a, b) simde_vrsubhn_high_u16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_u32(r, a, b) vrsubhn_high_u32((r), (a), (b)) -#else - #define simde_vrsubhn_high_u32(r, a, b) simde_vcombine_u16(r, simde_vrsubhn_u32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_u32 - #define vrsubhn_high_u32(r, a, b) simde_vrsubhn_high_u32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vrsubhn_high_u64(r, a, b) vrsubhn_high_u64((r), (a), (b)) -#else - #define simde_vrsubhn_high_u64(r, a, b) simde_vcombine_u32(r, simde_vrsubhn_u64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vrsubhn_high_u64 - #define vrsubhn_high_u64(r, a, b) simde_vrsubhn_high_u64((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_RSUBHN_HIGH_H) */ -/* :: End simde/arm/neon/rsubhn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/set_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SET_LANE_H) -#define SIMDE_ARM_NEON_SET_LANE_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vset_lane_f16(simde_float16_t a, simde_float16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_4_(vset_lane_f16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_float16x4_private v_ = simde_float16x4_to_private(v); - v_.values[lane] = a; - r = simde_float16x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_f16 - #define vset_lane_f16(a, b, c) simde_vset_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vset_lane_f32(simde_float32_t a, simde_float32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t r; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_(vset_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_float32x2_private v_ = simde_float32x2_to_private(v); - v_.values[lane] = a; - r = simde_float32x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_f32 - #define vset_lane_f32(a, b, c) simde_vset_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vset_lane_f64(simde_float64_t a, simde_float64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float64x1_t r; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - r = vset_lane_f64(a, v, 0); - #else - simde_float64x1_private v_ = simde_float64x1_to_private(v); - v_.values[lane] = a; - r = simde_float64x1_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vset_lane_f64 - #define vset_lane_f64(a, b, c) simde_vset_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vset_lane_s8(int8_t a, simde_int8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int8x8_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vset_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int8x8_private v_ = simde_int8x8_to_private(v); - v_.values[lane] = a; - r = simde_int8x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_s8 - #define vset_lane_s8(a, b, c) simde_vset_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vset_lane_s16(int16_t a, simde_int16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int16x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vset_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int16x4_private v_ = simde_int16x4_to_private(v); - v_.values[lane] = a; - r = simde_int16x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_s16 - #define vset_lane_s16(a, b, c) simde_vset_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vset_lane_s32(int32_t a, simde_int32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vset_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int32x2_private v_ = simde_int32x2_to_private(v); - v_.values[lane] = a; - r = simde_int32x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_s32 - #define vset_lane_s32(a, b, c) simde_vset_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vset_lane_s64(int64_t a, simde_int64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_int64x1_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - r = vset_lane_s64(a, v, 0); - #else - simde_int64x1_private v_ = simde_int64x1_to_private(v); - v_.values[lane] = a; - r = simde_int64x1_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_s64 - #define vset_lane_s64(a, b, c) simde_vset_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vset_lane_u8(uint8_t a, simde_uint8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint8x8_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vset_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint8x8_private v_ = simde_uint8x8_to_private(v); - v_.values[lane] = a; - r = simde_uint8x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_u8 - #define vset_lane_u8(a, b, c) simde_vset_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vset_lane_u16(uint16_t a, simde_uint16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint16x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vset_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint16x4_private v_ = simde_uint16x4_to_private(v); - v_.values[lane] = a; - r = simde_uint16x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_u16 - #define vset_lane_u16(a, b, c) simde_vset_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vset_lane_u32(uint32_t a, simde_uint32x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint32x2_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vset_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint32x2_private v_ = simde_uint32x2_to_private(v); - v_.values[lane] = a; - r = simde_uint32x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_u32 - #define vset_lane_u32(a, b, c) simde_vset_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vset_lane_u64(uint64_t a, simde_uint64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_uint64x1_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - r = vset_lane_u64(a, v, 0); - #else - simde_uint64x1_private v_ = simde_uint64x1_to_private(v); - v_.values[lane] = a; - r = simde_uint64x1_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_u64 - #define vset_lane_u64(a, b, c) simde_vset_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vsetq_lane_f16(simde_float16_t a, simde_float16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_float16x8_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_(vsetq_lane_f16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_float16x8_private v_ = simde_float16x8_to_private(v); - v_.values[lane] = a; - r = simde_float16x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_f16 - #define vsetq_lane_f16(a, b, c) simde_vsetq_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vsetq_lane_f32(simde_float32_t a, simde_float32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vsetq_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_float32x4_private v_ = simde_float32x4_to_private(v); - v_.values[lane] = a; - r = simde_float32x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_f32 - #define vsetq_lane_f32(a, b, c) simde_vsetq_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vsetq_lane_f64(simde_float64_t a, simde_float64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float64x2_t r; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_(vsetq_lane_f64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_float64x2_private v_ = simde_float64x2_to_private(v); - v_.values[lane] = a; - r = simde_float64x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_f64 - #define vsetq_lane_f64(a, b, c) simde_vsetq_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vsetq_lane_s8(int8_t a, simde_int8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_int8x16_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_(vsetq_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int8x16_private v_ = simde_int8x16_to_private(v); - v_.values[lane] = a; - r = simde_int8x16_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_s8 - #define vsetq_lane_s8(a, b, c) simde_vsetq_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsetq_lane_s16(int16_t a, simde_int16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_int16x8_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vsetq_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int16x8_private v_ = simde_int16x8_to_private(v); - v_.values[lane] = a; - r = simde_int16x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_s16 - #define vsetq_lane_s16(a, b, c) simde_vsetq_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsetq_lane_s32(int32_t a, simde_int32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vsetq_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int32x4_private v_ = simde_int32x4_to_private(v); - v_.values[lane] = a; - r = simde_int32x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_s32 - #define vsetq_lane_s32(a, b, c) simde_vsetq_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsetq_lane_s64(int64_t a, simde_int64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vsetq_lane_s64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_int64x2_private v_ = simde_int64x2_to_private(v); - v_.values[lane] = a; - r = simde_int64x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_s64 - #define vsetq_lane_s64(a, b, c) simde_vsetq_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vsetq_lane_u8(uint8_t a, simde_uint8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_uint8x16_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_(vsetq_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint8x16_private v_ = simde_uint8x16_to_private(v); - v_.values[lane] = a; - r = simde_uint8x16_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_u8 - #define vsetq_lane_u8(a, b, c) simde_vsetq_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsetq_lane_u16(uint16_t a, simde_uint16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_uint16x8_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_(vsetq_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint16x8_private v_ = simde_uint16x8_to_private(v); - v_.values[lane] = a; - r = simde_uint16x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_u16 - #define vsetq_lane_u16(a, b, c) simde_vsetq_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsetq_lane_u32(uint32_t a, simde_uint32x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_uint32x4_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_(vsetq_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint32x4_private v_ = simde_uint32x4_to_private(v); - v_.values[lane] = a; - r = simde_uint32x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_u32 - #define vsetq_lane_u32(a, b, c) simde_vsetq_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsetq_lane_u64(uint64_t a, simde_uint64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_uint64x2_t r; - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_(vsetq_lane_u64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_uint64x2_private v_ = simde_uint64x2_to_private(v); - v_.values[lane] = a; - r = simde_uint64x2_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_u64 - #define vsetq_lane_u64(a, b, c) simde_vsetq_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vset_lane_p8(simde_poly8_t a, simde_poly8x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly8x8_t r; - simde_poly8x8_private v_ = simde_poly8x8_to_private(v); - v_.values[lane] = a; - r = simde_poly8x8_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vset_lane_p8(a, b, c) vset_lane_p8((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_p8 - #define vset_lane_p8(a, b, c) simde_vset_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vset_lane_p16(simde_poly16_t a, simde_poly16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_poly16x4_t r; - simde_poly16x4_private v_ = simde_poly16x4_to_private(v); - v_.values[lane] = a; - r = simde_poly16x4_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vset_lane_p16(a, b, c) vset_lane_p16((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vset_lane_p16 - #define vset_lane_p16(a, b, c) simde_vset_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x1_t -simde_vset_lane_p64(simde_poly64_t a, simde_poly64x1_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_poly64x1_t r; - simde_poly64x1_private v_ = simde_poly64x1_to_private(v); - v_.values[lane] = a; - r = simde_poly64x1_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vset_lane_p64(a, b, c) vset_lane_p64((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vset_lane_p64 - #define vset_lane_p64(a, b, c) simde_vset_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vsetq_lane_p8(simde_poly8_t a, simde_poly8x16_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - simde_poly8x16_t r; - simde_poly8x16_private v_ = simde_poly8x16_to_private(v); - v_.values[lane] = a; - r = simde_poly8x16_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vsetq_lane_p8(a, b, c) vsetq_lane_p8((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_p8 - #define vsetq_lane_p8(a, b, c) simde_vsetq_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vsetq_lane_p16(simde_poly16_t a, simde_poly16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_poly16x8_t r; - simde_poly16x8_private v_ = simde_poly16x8_to_private(v); - v_.values[lane] = a; - r = simde_poly16x8_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vsetq_lane_p16(a, b, c) vsetq_lane_p16((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_p16 - #define vsetq_lane_p16(a, b, c) simde_vsetq_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vsetq_lane_p64(simde_poly64_t a, simde_poly64x2_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_poly64x2_t r; - simde_poly64x2_private v_ = simde_poly64x2_to_private(v); - v_.values[lane] = a; - r = simde_poly64x2_from_private(v_); - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_71362) - #define simde_vsetq_lane_p64(a, b, c) vsetq_lane_p64((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_p64 - #define vsetq_lane_p64(a, b, c) simde_vsetq_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x4_t -simde_vset_lane_bf16(simde_bfloat16_t a, simde_bfloat16x4_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_bfloat16x4_t r; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_(vset_lane_bf16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_bfloat16x4_private v_ = simde_bfloat16x4_to_private(v); - v_.values[lane] = a; - r = simde_bfloat16x4_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vset_lane_bf16 - #define vset_lane_bf16(a, b, c) simde_vset_lane_bf16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_bfloat16x8_t -simde_vsetq_lane_bf16(simde_bfloat16_t a, simde_bfloat16x8_t v, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - simde_bfloat16x8_t r; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_(vsetq_lane_bf16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); - #else - simde_bfloat16x8_private v_ = simde_bfloat16x8_to_private(v); - v_.values[lane] = a; - r = simde_bfloat16x8_from_private(v_); - #endif - return r; -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsetq_lane_bf16 - #define vsetq_lane_bf16(a, b, c) simde_vsetq_lane_bf16((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SET_LANE_H) */ -/* :: End simde/arm/neon/set_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sha1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHA1_H) -#define SIMDE_ARM_NEON_SHA1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define ROL(operand, N, shift) (((operand) >> (N-shift)) | ((operand) << (shift))) - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vsha1h_u32(uint32_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1h_u32(a); - #else - return ROL(a, 32, 30); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1h_u32 - #define vsha1h_u32(a) simde_vsha1h_u32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha1cq_u32(simde_uint32x4_t hash_abcd, uint32_t hash_e, simde_uint32x4_t wk) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1cq_u32(hash_abcd, hash_e, wk); - #else - simde_uint32x4_private - x_ = simde_uint32x4_to_private(hash_abcd), - w_ = simde_uint32x4_to_private(wk); - uint32_t y_ = hash_e; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(x_.values) / sizeof(x_.values[0])) ; i++) { - uint32_t t = (((x_.values[2] ^ x_.values[3]) & x_.values[1]) ^ x_.values[3]); - y_ = y_ + ROL(x_.values[0], 32, 5) + t + w_.values[i]; - x_.values[1] = ROL(x_.values[1], 32, 30); - uint32_t tmp = y_; - y_ = 0x0 | x_.values[3]; - x_.values[3] = 0x0 | x_.values[2]; - x_.values[2] = 0x0 | x_.values[1]; - x_.values[1] = 0x0 | x_.values[0]; - x_.values[0] = tmp | 0x0; - } - return simde_uint32x4_from_private(x_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1cq_u32 - #define vsha1cq_u32(hash_abcd, hash_e, wk) simde_vsha1cq_u32((hash_abcd), (hash_e), (wk)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha1mq_u32(simde_uint32x4_t hash_abcd, uint32_t hash_e, simde_uint32x4_t wk) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1mq_u32(hash_abcd, hash_e, wk); - #else - simde_uint32x4_private - x_ = simde_uint32x4_to_private(hash_abcd), - w_ = simde_uint32x4_to_private(wk); - uint32_t y_ = hash_e; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(x_.values) / sizeof(x_.values[0])) ; i++) { - uint32_t t = ((x_.values[1] & x_.values[2]) | ((x_.values[1] | x_.values[2]) & x_.values[3])); - y_ = y_ + ROL(x_.values[0], 32, 5) + t + w_.values[i]; - x_.values[1] = ROL(x_.values[1], 32, 30); - uint32_t tmp = y_; - y_ = 0x0 | x_.values[3]; - x_.values[3] = 0x0 | x_.values[2]; - x_.values[2] = 0x0 | x_.values[1]; - x_.values[1] = 0x0 | x_.values[0]; - x_.values[0] = tmp | 0x0; - } - return simde_uint32x4_from_private(x_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1mq_u32 - #define vsha1mq_u32(hash_abcd, hash_e, wk) simde_vsha1mq_u32((hash_abcd), (hash_e), (wk)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha1pq_u32(simde_uint32x4_t hash_abcd, uint32_t hash_e, simde_uint32x4_t wk) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1pq_u32(hash_abcd, hash_e, wk); - #else - simde_uint32x4_private - x_ = simde_uint32x4_to_private(hash_abcd), - w_ = simde_uint32x4_to_private(wk); - uint32_t y_ = hash_e; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(x_.values) / sizeof(x_.values[0])) ; i++) { - uint32_t t = (x_.values[1] ^ x_.values[2] ^ x_.values[3]); - y_ = y_ + ROL(x_.values[0], 32, 5) + t + w_.values[i]; - x_.values[1] = ROL(x_.values[1], 32, 30); - uint32_t tmp = y_; - y_ = 0x0 | x_.values[3]; - x_.values[3] = 0x0 | x_.values[2]; - x_.values[2] = 0x0 | x_.values[1]; - x_.values[1] = 0x0 | x_.values[0]; - x_.values[0] = tmp | 0x0; - } - return simde_uint32x4_from_private(x_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1pq_u32 - #define vsha1pq_u32(hash_abcd, hash_e, wk) simde_vsha1pq_u32((hash_abcd), (hash_e), (wk)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha1su0q_u32(simde_uint32x4_t w0_3, simde_uint32x4_t w4_7, simde_uint32x4_t w8_11) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1su0q_u32(w0_3, w4_7, w8_11); - #else - simde_uint32x4_private - r_, - x_ = simde_uint32x4_to_private(w0_3), - y_ = simde_uint32x4_to_private(w4_7), - z_ = simde_uint32x4_to_private(w8_11); - r_.values[3] = y_.values[1]; - r_.values[2] = y_.values[0]; - r_.values[1] = x_.values[3]; - r_.values[0] = x_.values[2]; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(x_.values) / sizeof(x_.values[0])) ; i++) { - r_.values[i] = r_.values[i] ^ x_.values[i] ^ z_.values[i]; - } - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1su0q_u32 - #define vsha1su0q_u32(w0_3, w4_7, w8_11) simde_vsha1su0q_u32((w0_3), (w4_7), (w8_11)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha1su1q_u32(simde_uint32x4_t tw0_3, simde_uint32x4_t tw12_15) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha1su1q_u32(tw0_3, tw12_15); - #else - simde_uint32x4_private - r_, - T_, - x_ = simde_uint32x4_to_private(tw0_3), - y_ = simde_uint32x4_to_private(tw12_15); - T_.values[0] = x_.values[0] ^ y_.values[1]; - T_.values[1] = x_.values[1] ^ y_.values[2]; - T_.values[2] = x_.values[2] ^ y_.values[3]; - T_.values[3] = x_.values[3] ^ 0x0; - r_.values[0] = ROL(T_.values[0], 32, 1); - r_.values[1] = ROL(T_.values[1], 32, 1); - r_.values[2] = ROL(T_.values[2], 32, 1); - r_.values[3] = ROL(T_.values[3], 32, 1) ^ ROL(T_.values[0], 32, 2); - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha1su1q_u32 - #define vsha1su1q_u32(tw0_3, tw12_15) simde_vsha1su1q_u32((tw0_3), (tw12_15)) -#endif - -#undef ROL - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHA1_H) */ -/* :: End simde/arm/neon/sha1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sha256.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHA256_H) -#define SIMDE_ARM_NEON_SHA256_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift))) -#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift))) -#define LSR(operand, shift) ((operand) >> (shift)) -#define LSL(operand, shift) ((operand) << (shift)) - -static uint32_t simde_SHAchoose(uint32_t x, uint32_t y, uint32_t z) { - return (((y ^ z) & x) ^ z); -} - -static uint32_t simde_SHAmajority(uint32_t x, uint32_t y, uint32_t z) { - return ((x & y) | ((x | y) & z)); -} - -static uint32_t simde_SHAhashSIGMA0(uint32_t x) { - return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22); -} - -static uint32_t simde_SHAhashSIGMA1(uint32_t x) { - return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25); -} - -static simde_uint32x4_t -x_simde_sha256hash(simde_uint32x4_t x, simde_uint32x4_t y, simde_uint32x4_t w, int part1) { - uint32_t chs, maj, t; - simde_uint32x4_private - x_ = simde_uint32x4_to_private(x), - y_ = simde_uint32x4_to_private(y), - w_ = simde_uint32x4_to_private(w); - - for(int i = 0; i < 4; ++i) { - chs = simde_SHAchoose(y_.values[0], y_.values[1], y_.values[2]); - maj = simde_SHAmajority(x_.values[0], x_.values[1], x_.values[2]); - t = y_.values[3] + simde_SHAhashSIGMA1(y_.values[0]) + chs + w_.values[i]; - x_.values[3] = t + x_.values[3]; - y_.values[3] = t + simde_SHAhashSIGMA0(x_.values[0]) + maj; - uint32_t tmp = y_.values[3]; - y_.values[3] = 0x0 | y_.values[2]; - y_.values[2] = 0x0 | y_.values[1]; - y_.values[1] = 0x0 | y_.values[0]; - y_.values[0] = 0x0 | x_.values[3]; - x_.values[3] = 0x0 | x_.values[2]; - x_.values[2] = 0x0 | x_.values[1]; - x_.values[1] = 0x0 | x_.values[0]; - x_.values[0] = tmp | 0x0; - } - return (part1 == 1) ? simde_uint32x4_from_private(x_) : simde_uint32x4_from_private(y_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha256hq_u32(simde_uint32x4_t hash_efgh, simde_uint32x4_t hash_abcd, simde_uint32x4_t wk) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha256hq_u32(hash_efgh, hash_abcd, wk); - #else - return x_simde_sha256hash(hash_efgh, hash_abcd, wk, 1); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha256hq_u32 - #define vsha256hq_u32(hash_efgh, hash_abcd, wk) simde_vsha256hq_u32((hash_efgh), (hash_abcd), (wk)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha256h2q_u32(simde_uint32x4_t hash_efgh, simde_uint32x4_t hash_abcd, simde_uint32x4_t wk) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha256h2q_u32(hash_efgh, hash_abcd, wk); - #else - return x_simde_sha256hash(hash_abcd, hash_efgh, wk, 0); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha256h2q_u32 - #define vsha256h2q_u32(hash_efgh, hash_abcd, wk) simde_vsha256h2q_u32((hash_efgh), (hash_abcd), (wk)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha256su0q_u32(simde_uint32x4_t w0_3, simde_uint32x4_t w4_7) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha256su0q_u32(w0_3, w4_7); - #else - simde_uint32x4_private - r_, - T_, - x_ = simde_uint32x4_to_private(w0_3), - y_ = simde_uint32x4_to_private(w4_7); - T_.values[3] = y_.values[0]; - T_.values[2] = x_.values[3]; - T_.values[1] = x_.values[2]; - T_.values[0] = x_.values[1]; - uint32_t elt; - for(int i = 0; i < 4; ++i) { - elt = T_.values[i]; - elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ LSR(elt, 3); - r_.values[i] = elt + x_.values[i]; - } - return simde_uint32x4_from_private(r_); - - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha256su0q_u32 - #define vsha256su0q_u32(w0_3, w4_7) simde_vsha256su0q_u32((w0_3), (w4_7)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsha256su1q_u32(simde_uint32x4_t tw0_3, simde_uint32x4_t w8_11, simde_uint32x4_t w12_15) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_SHA2) - return vsha256su1q_u32(tw0_3, w8_11, w12_15); - #else - simde_uint32x4_private - r_, - T0_, - x_ = simde_uint32x4_to_private(tw0_3), - y_ = simde_uint32x4_to_private(w8_11), - z_ = simde_uint32x4_to_private(w12_15); - simde_uint32x2_private T1_; - T0_.values[3] = z_.values[0]; - T0_.values[2] = y_.values[3]; - T0_.values[1] = y_.values[2]; - T0_.values[0] = y_.values[1]; - uint32_t elt; - T1_.values[1] = z_.values[3]; - T1_.values[0] = z_.values[2]; - for(int i = 0; i < 2; ++i) { - elt = T1_.values[i]; - elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ LSR(elt, 10); - elt = elt + x_.values[i] + T0_.values[i]; - r_.values[i] = elt; - } - T1_.values[1] = r_.values[1]; - T1_.values[0] = r_.values[0]; - for(int i = 2; i < 4; ++i) { - elt = T1_.values[i-2]; - elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ LSR(elt, 10); - elt = elt + x_.values[i] + T0_.values[i]; - r_.values[i] = elt; - } - return simde_uint32x4_from_private(r_); - - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsha256su1q_u32 - #define vsha256su1q_u32(tw0_3, w8_11, w12_15) simde_vsha256su1q_u32((tw0_3), (w8_11), (w12_15)) -#endif - -#undef ROR32 -#undef ROL32 -#undef LSR -#undef LSL - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHA256_H) */ -/* :: End simde/arm/neon/sha256.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sha512.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHA512_H) -#define SIMDE_ARM_NEON_SHA512_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define ROR64(operand, shift) (((operand) >> (shift)) | ((operand) << (64-shift))) -#define ROL64(operand, shift) (((operand) >> (64-shift)) | ((operand) << (shift))) -#define LSR(operand, shift) ((operand) >> (shift)) -#define LSL(operand, shift) ((operand) << (shift)) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsha512hq_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA512) - return vsha512hq_u64(w, x, y); - #else - simde_uint64x2_private - r_, - w_ = simde_uint64x2_to_private(w), - x_ = simde_uint64x2_to_private(x), - y_ = simde_uint64x2_to_private(y); - uint64_t Msigma1; - uint64_t tmp; - Msigma1 = ROR64(y_.values[1], 14) ^ ROR64(y_.values[1], 18) ^ ROR64(y_.values[1], 41); - r_.values[1] = (y_.values[1] & x_.values[0]) ^ (~(y_.values[1]) & x_.values[1]); - r_.values[1] = (r_.values[1] + Msigma1 + w_.values[1]); - tmp = r_.values[1] + y_.values[0]; - Msigma1 = ROR64(tmp, 14) ^ ROR64(tmp, 18) ^ ROR64(tmp, 41); - r_.values[0] = (tmp & y_.values[1]) ^ (~(tmp) & x_.values[0]); - r_.values[0] = (r_.values[0] + Msigma1 + w_.values[0]); - return simde_uint64x2_from_private(r_); - - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsha512hq_u64 - #define vsha512hq_u64(w, x, y) simde_vsha512hq_u64((w), (x), (y)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsha512h2q_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA512) - return vsha512h2q_u64(w, x, y); - #else - simde_uint64x2_private - r_, - w_ = simde_uint64x2_to_private(w), - x_ = simde_uint64x2_to_private(x), - y_ = simde_uint64x2_to_private(y); - uint64_t Msigma0; - Msigma0 = ROR64(y_.values[0], 28) ^ ROR64(y_.values[0], 34) ^ ROR64(y_.values[0], 39); - r_.values[1] = (y_.values[1] & x_.values[0]) ^ (y_.values[0] & x_.values[0]) ^ (y_.values[1] & y_.values[0]); - r_.values[1] = (r_.values[1] + Msigma0 + w_.values[1]); - Msigma0 = ROR64(r_.values[1], 28) ^ ROR64(r_.values[1], 34) ^ ROR64(r_.values[1], 39); - r_.values[0] = (r_.values[1] & y_.values[0]) ^ (r_.values[1] & y_.values[1]) ^ (y_.values[1] & y_.values[0]); - r_.values[0] = (r_.values[0] + Msigma0 + w_.values[0]); - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsha512h2q_u64 - #define vsha512h2q_u64(w, x, y) simde_vsha512h2q_u64((w), (x), (y)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsha512su0q_u64(simde_uint64x2_t w, simde_uint64x2_t x) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA512) - return vsha512su0q_u64(w, x); - #else - simde_uint64x2_private - r_, - w_ = simde_uint64x2_to_private(w), - x_ = simde_uint64x2_to_private(x); - uint64_t sig0; - sig0 = ROR64(w_.values[1], 1) ^ ROR64(w_.values[1], 8) ^ (w_.values[1] >> 7); - r_.values[0] = w_.values[0] + sig0; - sig0 = ROR64(x_.values[0], 1) ^ ROR64(x_.values[0], 8) ^ (x_.values[0] >> 7); - r_.values[1] = w_.values[1] + sig0; - return simde_uint64x2_from_private(r_); - - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsha512su0q_u64 - #define vsha512su0q_u64(w, x) simde_vsha512su0q_u64((w), (x)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsha512su1q_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA512) - return vsha512su1q_u64(w, x, y); - #else - simde_uint64x2_private - r_, - w_ = simde_uint64x2_to_private(w), - x_ = simde_uint64x2_to_private(x), - y_ = simde_uint64x2_to_private(y); - uint64_t sig1; - sig1 = ROR64(x_.values[1], 19) ^ ROR64(x_.values[1], 61) ^ (x_.values[1] >> 6); - r_.values[1] = w_.values[1] + sig1 + y_.values[1]; - sig1 = ROR64(x_.values[0], 19) ^ ROR64(x_.values[0], 61) ^ (x_.values[0] >> 6); - r_.values[0] = w_.values[0] + sig1 + y_.values[0]; - return simde_uint64x2_from_private(r_); - - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsha512su1q_u64 - #define vsha512su1q_u64(w, x, y) simde_vsha512su1q_u64((w), (x), (y)) -#endif - -#undef ROR64 -#undef ROL64 -#undef LSR -#undef LSL - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHA512_H) */ -/* :: End simde/arm/neon/sha512.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_SHL_H) -#define SIMDE_ARM_NEON_SHL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Notes from the implementer (Christopher Moore aka rosbif) - * - * I have tried to exactly reproduce the documented behaviour of the - * ARM NEON shl and shlq intrinsics. - * This is complicated for the following reasons:- - * - * a) Negative shift counts shift right. - * - * b) Only the low byte of the shift count is used but the shift count - * is not limited to 8-bit values (-128 to 127). - * - * c) Intel SIMD is not nearly as complete as NEON and AltiVec. - * There were no intrisics with a vector shift count before AVX2 which - * only has 32 and 64-bit logical ones and only a 32-bit arithmetic - * one. The others need AVX512. There are no 8-bit shift intrinsics at - * all, even with a scalar shift count. It is surprising to use AVX2 - * and even AVX512 to implement a 64-bit vector operation. - * - * d) Many shift implementations, and the C standard, do not treat a - * shift count >= the object's size in bits as one would expect. - * (Personally I feel that > is silly but == can be useful.) - * - * Maybe it would be useful for SIMDe to have a flag enabling a fast - * implementation where the result is only guaranteed for shift counts - * conforming to the C standard. - * - * Note that even the C17/18 standard does not define the behaviour of - * a right shift of a negative value. - * However Evan and I agree that all compilers likely to be used - * implement this as an arithmetic right shift with sign extension. - * If this is not the case it could be replaced by a logical right shift - * if negative values are complemented before and after the shift. - * - * Some of the SIMD translations may be slower than the portable code, - * particularly those for vectors with only one or two elements. - * But I had fun writing them ;-) - * - */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vshld_s64 (const int64_t a, const int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vshld_s64(a, b); - #else - int8_t b_ = HEDLEY_STATIC_CAST(int8_t, b); - return - (b_ >= 0) - ? (b_ >= 64) - ? 0 - : (a << b_) - : (b_ <= -64) - ? (a >> 63) - : (a >> -b_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshld_s64 - #define vshld_s64(a, b) simde_vshld_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vshld_u64 (const uint64_t a, const int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vshld_u64(a, HEDLEY_STATIC_CAST(int64_t, b)); - #else - int8_t b_ = HEDLEY_STATIC_CAST(int8_t, b); - return - (simde_math_llabs(b_) >= 64) - ? 0 - : (b_ >= 0) - ? (a << b_) - : (a >> -b_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshld_u64 - #define vshld_u64(a, b) simde_vshld_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), - _mm_srav_epi16(a128, _mm_abs_epi16(b128)), - _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a_.m64)); - __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), - _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); - r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, - (b_.values[i] >= 0) ? - (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_s8 - #define vshl_s8(a, b) simde_vshl_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srav_epi32(a128, _mm_abs_epi32(b128)), - _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, - (b_.values[i] >= 0) ? - (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_s16 - #define vshl_s16(a, b) simde_vshl_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srav_epi32(a128, _mm_abs_epi32(b128)), - _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = - (b_.values[i] >= 0) ? - (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_s32 - #define vshl_s32(a, b) simde_vshl_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a), - b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), - _mm_srav_epi64(a128, _mm_sub_epi64(zero, b128)), - _mm_cmpgt_epi64(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - __m128i maska = _mm_cmpgt_epi64(zero, a128); - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), - _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b_abs), maska), - _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vshld_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_s64 - #define vshl_s64(a, b) simde_vshl_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), - _mm_srlv_epi16(a128, _mm_abs_epi16(b128)), - _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a_.m64)); - __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), - _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); - r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, - (simde_math_abs(b_.values[i]) >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_u8 - #define vshl_u8(a, b) simde_vshl_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a_.m64)); - __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), - _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, - (simde_math_abs(b_.values[i]) >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_u16 - #define vshl_u16(a, b) simde_vshl_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), - _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), - _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = - (simde_math_abs(b_.values[i]) >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i]); - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_u32 - #define vshl_u32(a, b) simde_vshl_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshl_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - simde_int64x1_private b_ = simde_int64x1_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), - _mm_srlv_epi64(a128, _mm_sub_epi64(zero, b128)), - _mm_cmpgt_epi64(zero, b128)); - r_.m64 = _mm_movepi64_pi64(r128); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_movpi64_epi64(a_.m64); - __m128i b128 = _mm_movpi64_epi64(b_.m64); - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); - __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), - _mm_srlv_epi64(a128, b_abs), - _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vshld_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x1_from_private(r_); -#endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshl_u64 - #define vshl_u64(a, b) simde_vshl_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shl, a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_max; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; - b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); - b_max = vec_splat_u8(7); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); - #else - a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splat_u8(8))); - #endif - a_shr = vec_sra(a, vec_min(b_abs, b_max)); - b_mask = vec_cmplt(b, vec_splat_s8(0)); - return vec_sel(a_shl, a_shr, b_mask); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m256i a256 = _mm256_cvtepi8_epi16(a_.m128i); - __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), - _mm256_srav_epi16(a256, _mm256_abs_epi16(b256)), - _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); - r_.m128i = _mm256_cvtepi16_epi8(r256); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, - (b_.values[i] >= 0) ? - (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_s8 - #define vshlq_s8(a, b) simde_vshlq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shl, a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_max; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); - b_max = vec_splat_u16(15); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); - #else - a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); - #endif - a_shr = vec_sra(a, vec_min(b_abs, b_max)); - b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); - return vec_sel(a_shl, a_shr, b_mask); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), - _mm_srav_epi16(a_.m128i, _mm_abs_epi16(B)), - _mm_cmpgt_epi16(_mm_setzero_si128(), B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) - __m256i a256 = _mm256_cvtepi16_epi32(a_.m128i); - __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); - b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), - _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); - r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, - (b_.values[i] >= 0) ? - (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_s16 - #define vshlq_s16(a, b) simde_vshlq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shl, a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_max; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); - b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 31)); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); - #else - a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); - #endif - a_shr = vec_sra(a, vec_min(b_abs, b_max)); - b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), - vec_splat_s32(0)); - return vec_sel(a_shl, a_shr, b_mask); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), - _mm_srav_epi32(a_.m128i, _mm_abs_epi32(B)), - _mm_cmpgt_epi32(_mm_setzero_si128(), B)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = - (b_.values[i] >= 0) ? - (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : - (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_s32 - #define vshlq_s32(a, b) simde_vshlq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shl, a_shr; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_max; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); - b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63)); - a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); - a_shr = vec_sra(a, vec_min(b_abs, b_max)); - b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), - vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); - HEDLEY_DIAGNOSTIC_PUSH - #if defined(SIMDE_BUG_CLANG_46770) - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif - return vec_sel(a_shl, a_shr, b_mask); - HEDLEY_DIAGNOSTIC_POP - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), - _mm_srav_epi64(a_.m128i, _mm_sub_epi64(zero, B)), - _mm_cmpgt_epi64(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i maska = _mm_cmpgt_epi64(zero, a_.m128i); - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), - _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a_.m128i, maska), b_abs), maska), - _mm_cmpgt_epi64(zero, _mm_slli_epi64(b_.m128i, 56))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vshld_s64(a_.values[i], b_.values[i]); - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_s64 - #define vshlq_s64(a, b) simde_vshlq_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; - b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); - b_mask = vec_cmplt(b, vec_splat_s8(0)); - return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), - vec_cmplt(b_abs, vec_splat_u8(8))); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m256i a256 = _mm256_cvtepu8_epi16(a_.m128i); - __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), - _mm256_srlv_epi16(a256, _mm256_abs_epi16(b256)), - _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); - r_.m128i = _mm256_cvtepi16_epi8(r256); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, - (simde_math_abs(b_.values[i]) >= 8) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_u8 - #define vshlq_u8(a, b) simde_vshlq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); - b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), - vec_cmple(b_abs, vec_splat_u16(15))); - #else - return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), - vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); - #endif - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), - _mm_srlv_epi16(a_.m128i, _mm_abs_epi16(B)), - _mm_cmpgt_epi16(_mm_setzero_si128(), B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) - __m256i a256 = _mm256_cvtepu16_epi32(a_.m128i); - __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); - b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); - __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), - _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), - _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); - r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); - r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, - (simde_math_abs(b_.values[i]) >= 16) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i])); - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_u16 - #define vshlq_u16(a, b) simde_vshlq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); - b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), vec_splat_s32(0)); - return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), - vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), - _mm_srlv_epi32(a_.m128i, _mm_abs_epi32(B)), - _mm_cmpgt_epi32(_mm_setzero_si128(), B)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); - r_.values[i] = (simde_math_abs(b_.values[i]) >= 32) ? 0 : - (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : - (a_.values[i] >> -b_.values[i]); - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_u32 - #define vshlq_u32(a, b) simde_vshlq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vshlq_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; - b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), - vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), - vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); - b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), - vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); - HEDLEY_DIAGNOSTIC_PUSH - #if defined(SIMDE_BUG_CLANG_46770) - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif - return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), - vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)))); - HEDLEY_DIAGNOSTIC_POP - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - simde_int64x2_private b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i zero = _mm_setzero_si128(); - __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), - _mm_srlv_epi64(a_.m128i, _mm_sub_epi64(zero, B)), - _mm_cmpgt_epi64(zero, B)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); - r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), - _mm_srlv_epi64(a_.m128i, b_abs), - _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b_.m128i, 56))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vshld_u64(a_.values[i], b_.values[i]); - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshlq_u64 - #define vshlq_u64(a, b) simde_vshlq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHL_H) */ -/* :: End simde/arm/neon/shl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shll_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHLL_HIGH_N_H) -#define SIMDE_ARM_NEON_SHLL_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* - * The constant range requirements for the shift amount *n* looks strange. - * The ARM Neon Intrinsics Reference states that for *_s8, 0 << n << 7. This - * does not match the actual instruction decoding in the ARM Reference manual, - * which states that the shift amount "must be equal to the source element width - * in bits" (ARM DDI 0487F.b C7-1959). So for *_s8 instructions, *n* must be 8, - * for *_s16, it must be 16, and *_s32 must be 32 (similarly for unsigned). - */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vshll_high_n_s8 (const simde_int8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 7) { - simde_int16x8_private r_; - simde_int8x16_private a_ = simde_int8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, HEDLEY_STATIC_CAST(int16_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n); - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_s8(a, n) vshll_high_n_s8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_s8 - #define vshll_high_n_s8(a, n) simde_vshll_high_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vshll_high_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 15) { - simde_int32x4_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n; - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_s16(a, n) vshll_high_n_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_s16 - #define vshll_high_n_s16(a, n) simde_vshll_high_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vshll_high_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 31) { - simde_int64x2_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n; - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_s32(a, n) vshll_high_n_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_s32 - #define vshll_high_n_s32(a, n) simde_vshll_high_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vshll_high_n_u8 (const simde_uint8x16_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 7) { - simde_uint16x8_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint16_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n); - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_u8(a, n) vshll_high_n_u8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_u8 - #define vshll_high_n_u8(a, n) simde_vshll_high_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vshll_high_n_u16 (const simde_uint16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 15) { - simde_uint32x4_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n; - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_u16(a, n) vshll_high_n_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_u16 - #define vshll_high_n_u16(a, n) simde_vshll_high_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vshll_high_n_u32 (const simde_uint32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 31) { - simde_uint64x2_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i+(sizeof(r_.values) / sizeof(r_.values[0]))]) << n; - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshll_high_n_u32(a, n) vshll_high_n_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshll_high_n_u32 - #define vshll_high_n_u32(a, n) simde_vshll_high_n_u32((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHLL_HIGH_N_H) */ -/* :: End simde/arm/neon/shll_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shll_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_SHLL_N_H) -#define SIMDE_ARM_NEON_SHLL_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* - * The constant range requirements for the shift amount *n* looks strange. - * The ARM Neon Intrinsics Reference states that for *_s8, 0 << n << 7. This - * does not match the actual instruction decoding in the ARM Reference manual, - * which states that the shift amount "must be equal to the source element width - * in bits" (ARM DDI 0487F.b C7-1959). So for *_s8 instructions, *n* must be 8, - * for *_s16, it must be 16, and *_s32 must be 32 (similarly for unsigned). - */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vshll_n_s8 (const simde_int8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 8) { - simde_int16x8_private r_; - simde_int8x8_private a_ = simde_int8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, HEDLEY_STATIC_CAST(int16_t, a_.values[i]) << n); - } - - return simde_int16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_s8(a, n) vshll_n_s8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_s8 - #define vshll_n_s8(a, n) simde_vshll_n_s8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vshll_n_s16 (const simde_int16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 16) { - simde_int32x4_private r_; - simde_int16x4_private a_ = simde_int16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) << n; - } - - return simde_int32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_s16(a, n) vshll_n_s16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_s16 - #define vshll_n_s16(a, n) simde_vshll_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vshll_n_s32 (const simde_int32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 32) { - simde_int64x2_private r_; - simde_int32x2_private a_ = simde_int32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) << n; - } - - return simde_int64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_s32(a, n) vshll_n_s32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_s32 - #define vshll_n_s32(a, n) simde_vshll_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vshll_n_u8 (const simde_uint8x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 8) { - simde_uint16x8_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) << n); - } - - return simde_uint16x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_u8(a, n) vshll_n_u8((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_u8 - #define vshll_n_u8(a, n) simde_vshll_n_u8((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vshll_n_u16 (const simde_uint16x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 16) { - simde_uint32x4_private r_; - simde_uint16x4_private a_ = simde_uint16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) << n; - } - - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_u16(a, n) vshll_n_u16((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_u16 - #define vshll_n_u16(a, n) simde_vshll_n_u16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vshll_n_u32 (const simde_uint32x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 32) { - simde_uint64x2_private r_; - simde_uint32x2_private a_ = simde_uint32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) << n; - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshll_n_u32(a, n) vshll_n_u32((a), (n)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshll_n_u32 - #define vshll_n_u32(a, n) simde_vshll_n_u32((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHLL_N_H) */ -/* :: End simde/arm/neon/shll_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shrn_high_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHRN_HIGH_N_H) -#define SIMDE_ARM_NEON_SHRN_HIGH_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/shrn_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SHRN_N_H) -#define SIMDE_ARM_NEON_SHRN_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vshrn_n_s16 (const simde_int16x8_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - simde_int8x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] >> n) & UINT8_MAX); - } - return simde_int8x8_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_s16(a, n) vshrn_n_s16((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vshrn_n_s16(a, n) simde_vmovn_s16(simde_vshrq_n_s16((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_s16 - #define vshrn_n_s16(a, n) simde_vshrn_n_s16((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vshrn_n_s32 (const simde_int32x4_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { - simde_int16x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] >> n) & UINT16_MAX); - } - - return simde_int16x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_s32(a, n) vshrn_n_s32((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vshrn_n_s32(a, n) simde_vmovn_s32(simde_vshrq_n_s32((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_s32 - #define vshrn_n_s32(a, n) simde_vshrn_n_s32((a), (n)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vshrn_n_s64 (const simde_int64x2_t a, const int n) - SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { - simde_int32x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (a_.values[i] >> n) & UINT32_MAX); - } - - return simde_int32x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_s64(a, n) vshrn_n_s64((a), (n)) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_vshrn_n_s64(a, n) simde_vmovn_s64(simde_vshrq_n_s64((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_s64 - #define vshrn_n_s64(a, n) simde_vshrn_n_s64((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_u16(a, n) vshrn_n_u16((a), (n)) -#else - #define simde_vshrn_n_u16(a, n) \ - simde_vreinterpret_u8_s8( \ - simde_vshrn_n_s16(simde_vreinterpretq_s16_u16(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_u16 - #define vshrn_n_u16(a, n) simde_vshrn_n_u16((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_u32(a, n) vshrn_n_u32((a), (n)) -#else - #define simde_vshrn_n_u32(a, n) \ - simde_vreinterpret_u16_s16( \ - simde_vshrn_n_s32(simde_vreinterpretq_s32_u32(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_u32 - #define vshrn_n_u32(a, n) simde_vshrn_n_u32((a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vshrn_n_u64(a, n) vshrn_n_u64((a), (n)) -#else - #define simde_vshrn_n_u64(a, n) \ - simde_vreinterpret_u32_s32( \ - simde_vshrn_n_s64(simde_vreinterpretq_s64_u64(a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vshrn_n_u64 - #define vshrn_n_u64(a, n) simde_vshrn_n_u64((a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHRN_N_H) */ -/* :: End simde/arm/neon/shrn_n.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_s16(r, a, n) vshrn_high_n_s16((r), (a), (n)) -#else - #define simde_vshrn_high_n_s16(r, a, n) \ - simde_vcombine_s8((r), simde_vshrn_n_s16((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_s16 - #define vshrn_high_n_s16(r, a, n) simde_vshrn_high_n_s16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_s32(r, a, n) vshrn_high_n_s32((r), (a), (n)) -#else - #define simde_vshrn_high_n_s32(r, a, n) \ - simde_vcombine_s16((r), simde_vshrn_n_s32((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_s32 - #define vshrn_high_n_s32(r, a, n) simde_vshrn_high_n_s32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_s64(r, a, n) vshrn_high_n_s64((r), (a), (n)) -#else - #define simde_vshrn_high_n_s64(r, a, n) \ - simde_vcombine_s32((r), simde_vshrn_n_s64((a), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_s64 - #define vshrn_high_n_s64(r, a, n) simde_vshrn_high_n_s64((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_u16(r, a, n) vshrn_high_n_u16((r), (a), (n)) -#else - #define simde_vshrn_high_n_u16(r, a, n) \ - simde_vreinterpretq_u8_s8( \ - simde_vcombine_s8(simde_vreinterpret_s8_u8(r), \ - simde_vshrn_n_s16(simde_vreinterpretq_s16_u16(a), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_u16 - #define vshrn_high_n_u16(r, a, n) simde_vshrn_high_n_u16((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_u32(r, a, n) vshrn_high_n_u32((r), (a), (n)) -#else - #define simde_vshrn_high_n_u32(r, a, n) \ - simde_vreinterpretq_u16_s16( \ - simde_vcombine_s16(simde_vreinterpret_s16_u16(r), \ - simde_vshrn_n_s32(simde_vreinterpretq_s32_u32(a), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_u32 - #define vshrn_high_n_u32(r, a, n) simde_vshrn_high_n_u32((r), (a), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vshrn_high_n_u64(r, a, n) vshrn_high_n_u64((r), (a), (n)) -#else - #define simde_vshrn_high_n_u64(r, a, n) \ - simde_vreinterpretq_u32_s32( \ - simde_vcombine_s32(simde_vreinterpret_s32_u32(r), \ - simde_vshrn_n_s64(simde_vreinterpretq_s64_u64(a), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vshrn_high_n_u64 - #define vshrn_high_n_u64(r, a, n) simde_vshrn_high_n_u64((r), (a), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SHRN_HIGH_N_H) */ -/* :: End simde/arm/neon/shrn_high_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sli_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SLI_N_H) -#define SIMDE_ARM_NEON_SLI_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vslid_n_s64(a, b, n) vslid_n_s64(a, b, n) -#else - #define simde_vslid_n_s64(a, b, n) \ - HEDLEY_STATIC_CAST(int64_t, \ - simde_vslid_n_u64(HEDLEY_STATIC_CAST(uint64_t, a), HEDLEY_STATIC_CAST(uint64_t, b), n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vslid_n_s64 - #define vslid_n_s64(a, b, n) simde_vslid_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vslid_n_u64(a, b, n) vslid_n_u64(a, b, n) -#else -#define simde_vslid_n_u64(a, b, n) \ - (((a & (UINT64_C(0xffffffffffffffff) >> (64 - n))) | simde_vshld_n_u64((b), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vslid_n_u64 - #define vslid_n_u64(a, b, n) simde_vslid_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_s8(a, b, n) vsli_n_s8((a), (b), (n)) -#else - #define simde_vsli_n_s8(a, b, n) \ - simde_vreinterpret_s8_u8(simde_vsli_n_u8( \ - simde_vreinterpret_u8_s8((a)), simde_vreinterpret_u8_s8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_s8 - #define vsli_n_s8(a, b, n) simde_vsli_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_u8(a, b, n) vsli_n_u8((a), (b), (n)) -#else - #define simde_vsli_n_u8(a, b, n) \ - simde_vorr_u8( \ - simde_vand_u8((a), simde_vdup_n_u8((UINT8_C(0xff) >> (8 - n)))), \ - simde_vshl_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_u8 - #define vsli_n_u8(a, b, n) simde_vsli_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_s16(a, b, n) vsli_n_s16((a), (b), (n)) -#else - #define simde_vsli_n_s16(a, b, n) \ - simde_vreinterpret_s16_u16(simde_vsli_n_u16( \ - simde_vreinterpret_u16_s16((a)), simde_vreinterpret_u16_s16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_s16 - #define vsli_n_s16(a, b, n) simde_vsli_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_u16(a, b, n) vsli_n_u16((a), (b), (n)) -#else - #define simde_vsli_n_u16(a, b, n) \ - simde_vorr_u16( \ - simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0xffff) >> (16 - n)))), \ - simde_vshl_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_u16 - #define vsli_n_u16(a, b, n) simde_vsli_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_s32(a, b, n) vsli_n_s32((a), (b), (n)) -#else - #define simde_vsli_n_s32(a, b, n) \ - simde_vreinterpret_s32_u32(simde_vsli_n_u32( \ - simde_vreinterpret_u32_s32((a)), simde_vreinterpret_u32_s32((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_s32 - #define vsli_n_s32(a, b, n) simde_vsli_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_u32(a, b, n) vsli_n_u32((a), (b), (n)) -#else - #define simde_vsli_n_u32(a, b, n) \ - simde_vorr_u32( \ - simde_vand_u32((a), \ - simde_vdup_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \ - simde_vshl_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_u32 - #define vsli_n_u32(a, b, n) simde_vsli_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_s64(a, b, n) vsli_n_s64((a), (b), (n)) -#else - #define simde_vsli_n_s64(a, b, n) \ - simde_vreinterpret_s64_u64(simde_vsli_n_u64( \ - simde_vreinterpret_u64_s64((a)), simde_vreinterpret_u64_s64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_s64 - #define vsli_n_s64(a, b, n) simde_vsli_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_u64(a, b, n) vsli_n_u64((a), (b), (n)) -#else -#define simde_vsli_n_u64(a, b, n) \ - simde_vorr_u64( \ - simde_vand_u64((a), simde_vdup_n_u64( \ - (UINT64_C(0xffffffffffffffff) >> (64 - n)))), \ - simde_vshl_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_u64 - #define vsli_n_u64(a, b, n) simde_vsli_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_s8(a, b, n) vsliq_n_s8((a), (b), (n)) -#else - #define simde_vsliq_n_s8(a, b, n) \ - simde_vreinterpretq_s8_u8(simde_vsliq_n_u8( \ - simde_vreinterpretq_u8_s8((a)), simde_vreinterpretq_u8_s8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_s8 - #define vsliq_n_s8(a, b, n) simde_vsliq_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_u8(a, b, n) vsliq_n_u8((a), (b), (n)) -#else - #define simde_vsliq_n_u8(a, b, n) \ - simde_vorrq_u8( \ - simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0xff) >> (8 - n)))), \ - simde_vshlq_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_u8 - #define vsliq_n_u8(a, b, n) simde_vsliq_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_s16(a, b, n) vsliq_n_s16((a), (b), (n)) -#else - #define simde_vsliq_n_s16(a, b, n) \ - simde_vreinterpretq_s16_u16(simde_vsliq_n_u16( \ - simde_vreinterpretq_u16_s16((a)), simde_vreinterpretq_u16_s16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_s16 - #define vsliq_n_s16(a, b, n) simde_vsliq_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_u16(a, b, n) vsliq_n_u16((a), (b), (n)) -#else - #define simde_vsliq_n_u16(a, b, n) \ - simde_vorrq_u16( \ - simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0xffff) >> (16 - n)))), \ - simde_vshlq_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_u16 - #define vsliq_n_u16(a, b, n) simde_vsliq_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_s32(a, b, n) vsliq_n_s32((a), (b), (n)) -#else - #define simde_vsliq_n_s32(a, b, n) \ - simde_vreinterpretq_s32_u32(simde_vsliq_n_u32( \ - simde_vreinterpretq_u32_s32((a)), simde_vreinterpretq_u32_s32((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_s32 - #define vsliq_n_s32(a, b, n) simde_vsliq_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_u32(a, b, n) vsliq_n_u32((a), (b), (n)) -#else - #define simde_vsliq_n_u32(a, b, n) \ - simde_vorrq_u32( \ - simde_vandq_u32((a), \ - simde_vdupq_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \ - simde_vshlq_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_u32 - #define vsliq_n_u32(a, b, n) simde_vsliq_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_s64(a, b, n) vsliq_n_s64((a), (b), (n)) -#else - #define simde_vsliq_n_s64(a, b, n) \ - simde_vreinterpretq_s64_u64(simde_vsliq_n_u64( \ - simde_vreinterpretq_u64_s64((a)), simde_vreinterpretq_u64_s64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_s64 - #define vsliq_n_s64(a, b, n) simde_vsliq_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_u64(a, b, n) vsliq_n_u64((a), (b), (n)) -#else -#define simde_vsliq_n_u64(a, b, n) \ - simde_vorrq_u64( \ - simde_vandq_u64((a), simde_vdupq_n_u64( \ - (UINT64_C(0xffffffffffffffff) >> (64 - n)))), \ - simde_vshlq_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_u64 - #define vsliq_n_u64(a, b, n) simde_vsliq_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_p8(a, b, n) vsli_n_p8((a), (b), (n)) -#else - #define simde_vsli_n_p8(a, b, n) \ - simde_vreinterpret_p8_u8(simde_vsli_n_u8( \ - simde_vreinterpret_u8_p8((a)), simde_vreinterpret_u8_p8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_p8 - #define vsli_n_p8(a, b, n) simde_vsli_n_p8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsli_n_p16(a, b, n) vsli_n_p16((a), (b), (n)) -#else - #define simde_vsli_n_p16(a, b, n) \ - simde_vreinterpret_p16_u16(simde_vsli_n_u16( \ - simde_vreinterpret_u16_p16((a)), simde_vreinterpret_u16_p16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsli_n_p16 - #define vsli_n_p16(a, b, n) simde_vsli_n_p16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vsli_n_p64(a, b, n) vsli_n_p64((a), (b), (n)) -#else - #define simde_vsli_n_p64(a, b, n) \ - simde_vreinterpret_p64_u64(simde_vsli_n_u64( \ - simde_vreinterpret_u64_p64((a)), simde_vreinterpret_u64_p64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsli_n_p64 - #define vsli_n_p64(a, b, n) simde_vsli_n_p64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_p8(a, b, n) vsliq_n_p8((a), (b), (n)) -#else - #define simde_vsliq_n_p8(a, b, n) \ - simde_vreinterpretq_p8_u8(simde_vsliq_n_u8( \ - simde_vreinterpretq_u8_p8((a)), simde_vreinterpretq_u8_p8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_p8 - #define vsliq_n_p8(a, b, n) simde_vsliq_n_p8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsliq_n_p16(a, b, n) vsliq_n_p16((a), (b), (n)) -#else - #define simde_vsliq_n_p16(a, b, n) \ - simde_vreinterpretq_p16_u16(simde_vsliq_n_u16( \ - simde_vreinterpretq_u16_p16((a)), simde_vreinterpretq_u16_p16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_p16 - #define vsliq_n_p16(a, b, n) simde_vsliq_n_p16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vsliq_n_p64(a, b, n) vsliq_n_p64((a), (b), (n)) -#else - #define simde_vsliq_n_p64(a, b, n) \ - simde_vreinterpretq_p64_u64(simde_vsliq_n_u64( \ - simde_vreinterpretq_u64_p64((a)), simde_vreinterpretq_u64_p64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsliq_n_p64 - #define vsliq_n_p64(a, b, n) simde_vsliq_n_p64((a), (b), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SLI_N_H) */ -/* :: End simde/arm/neon/sli_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sm3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SM3_H) -#define SIMDE_ARM_NEON_SM3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift))) -#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift))) -#define LSR(operand, shift) ((operand) >> (shift)) -#define LSL(operand, shift) ((operand) << (shift)) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3ss1q_u32(simde_uint32x4_t n, simde_uint32x4_t m, simde_uint32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - return vsm3ss1q_u32(n, m, a); - #else - simde_uint32x4_private - r_, - n_ = simde_uint32x4_to_private(n), - m_ = simde_uint32x4_to_private(m), - a_ = simde_uint32x4_to_private(a); - r_.values[3] = ROL32((ROL32(n_.values[3], 12) + m_.values[3] + a_.values[3]), 7); - r_.values[2] = 0; - r_.values[1] = 0; - r_.values[0] = 0; - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3ss1q_u32 - #define vsm3ss1q_u32(n, m, a) simde_vsm3ss1q_u32((n), (m), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3tt1aq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c, const int imm2) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - uint32_t WjPrime, TT1, SS2; - - WjPrime = c_.values[imm2]; - SS2 = b_.values[3] ^ ROL32(a_.values[3], 12); - TT1 = a_.values[1] ^ (a_.values[3] ^ a_.values[2]); - TT1 = (TT1 + a_.values[0] + SS2 + WjPrime); - r_.values[0] = a_.values[1]; - r_.values[1] = ROL32(a_.values[2], 9); - r_.values[2] = a_.values[3]; - r_.values[3] = TT1; - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - #define simde_vsm3tt1aq_u32(a, b, c, imm2) vsm3tt1aq_u32((a), (b), (c), (imm2)); -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3tt1aq_u32 - #define vsm3tt1aq_u32(a, b, c, imm2) simde_vsm3tt1aq_u32((a), (b), (c), (imm2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3tt1bq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c, const int imm2) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - uint32_t WjPrime, TT1, SS2; - - WjPrime = c_.values[imm2]; - SS2 = b_.values[3] ^ ROL32(a_.values[3], 12); - TT1 = (a_.values[3] & a_.values[1]) | (a_.values[3] & a_.values[2]) | (a_.values[1] & a_.values[2]); - TT1 = (TT1 + a_.values[0] + SS2 + WjPrime); - r_.values[0] = a_.values[1]; - r_.values[1] = ROL32(a_.values[2], 9); - r_.values[2] = a_.values[3]; - r_.values[3] = TT1; - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - #define simde_vsm3tt1bq_u32(a, b, c, imm2) vsm3tt1bq_u32((a), (b), (c), (imm2)); -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3tt1bq_u32 - #define vsm3tt1bq_u32(a, b, c, imm2) simde_vsm3tt1bq_u32((a), (b), (c), (imm2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3tt2aq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c, const int imm2) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - uint32_t Wj, TT2; - - Wj = c_.values[imm2]; - TT2 = a_.values[1] ^ (a_.values[3] ^ a_.values[2]); - TT2 = (TT2 + a_.values[0] + b_.values[3] + Wj); - r_.values[0] = a_.values[1]; - r_.values[1] = ROL32(a_.values[2], 19); - r_.values[2] = a_.values[3]; - r_.values[3] = TT2 ^ ROL32(TT2, 9) ^ ROL32(TT2, 17); - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - #define simde_vsm3tt2aq_u32(a, b, c, imm2) vsm3tt2aq_u32((a), (b), (c), (imm2)); -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3tt2aq_u32 - #define vsm3tt2aq_u32(a, b, c, imm2) simde_vsm3tt2aq_u32((a), (b), (c), (imm2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3tt2bq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c, const int imm2) { - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - uint32_t Wj, TT2; - - Wj = c_.values[imm2]; - TT2 = (a_.values[3] & a_.values[2]) | (~(a_.values[3]) & a_.values[1]); - TT2 = (TT2 + a_.values[0] + b_.values[3] + Wj); - r_.values[0] = a_.values[1]; - r_.values[1] = ROL32(a_.values[2], 19); - r_.values[2] = a_.values[3]; - r_.values[3] = TT2 ^ ROL32(TT2, 9) ^ ROL32(TT2, 17); - return simde_uint32x4_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - #define simde_vsm3tt2bq_u32(a, b, c, imm2) vsm3tt2bq_u32((a), (b), (c), (imm2)); -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3tt2bq_u32 - #define vsm3tt2bq_u32(a, b, c, imm2) simde_vsm3tt2bq_u32((a), (b), (c), (imm2)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3partw1q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - return vsm3partw1q_u32(a, b, c); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - r_.values[2] = (a_.values[2] ^ b_.values[2]) ^ (ROL32(c_.values[3], 15)); - r_.values[1] = (a_.values[1] ^ b_.values[1]) ^ (ROL32(c_.values[2], 15)); - r_.values[0] = (a_.values[0] ^ b_.values[0]) ^ (ROL32(c_.values[1], 15)); - for(int i = 0; i < 4; ++i) { - if (i == 3) { - r_.values[3] = (a_.values[3] ^ b_.values[3]) ^ (ROL32(r_.values[0], 15)); - } - r_.values[i] = r_.values[i] ^ ROL32(r_.values[i], 15) ^ ROL32(r_.values[i], 23); - } - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3partw1q_u32 - #define vsm3partw1q_u32(a, b, c) simde_vsm3partw1q_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm3partw2q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM3) - return vsm3partw2q_u32(a, b, c); - #else - simde_uint32x4_private - r_, - tmp_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b), - c_ = simde_uint32x4_to_private(c); - uint32_t tmp2; - tmp_.values[3] = b_.values[3] ^ (ROL32(c_.values[3], 7)); - tmp_.values[2] = b_.values[2] ^ (ROL32(c_.values[2], 7)); - tmp_.values[1] = b_.values[1] ^ (ROL32(c_.values[1], 7)); - tmp_.values[0] = b_.values[0] ^ (ROL32(c_.values[0], 7)); - r_.values[3] = a_.values[3] ^ tmp_.values[3]; - r_.values[2] = a_.values[2] ^ tmp_.values[2]; - r_.values[1] = a_.values[1] ^ tmp_.values[1]; - r_.values[0] = a_.values[0] ^ tmp_.values[0]; - tmp2 = ROL32(tmp_.values[0], 15); - tmp2 = tmp2 ^ ROL32(tmp2, 15) ^ ROL32(tmp2, 23); - r_.values[3] = r_.values[3] ^ tmp2; - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm3partw2q_u32 - #define vsm3partw2q_u32(a, b, c) simde_vsm3partw2q_u32((a), (b), (c)) -#endif - -#undef ROR32 -#undef ROL32 -#undef LSR -#undef LSL - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SM3_H) */ -/* :: End simde/arm/neon/sm3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sm4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SM4_H) -#define SIMDE_ARM_NEON_SM4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift))) -#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift))) -#define LSR(operand, shift) ((operand) >> (shift)) -#define LSL(operand, shift) ((operand) << (shift)) - -static const uint8_t simde_sbox_sm4[256] = { - 0xd6,0x90,0xe9,0xfe,0xcc,0xe1,0x3d,0xb7,0x16,0xb6,0x14,0xc2,0x28,0xfb,0x2c,0x05, - 0x2b,0x67,0x9a,0x76,0x2a,0xbe,0x04,0xc3,0xaa,0x44,0x13,0x26,0x49,0x86,0x06,0x99, - 0x9c,0x42,0x50,0xf4,0x91,0xef,0x98,0x7a,0x33,0x54,0x0b,0x43,0xed,0xcf,0xac,0x62, - 0xe4,0xb3,0x1c,0xa9,0xc9,0x08,0xe8,0x95,0x80,0xdf,0x94,0xfa,0x75,0x8f,0x3f,0xa6, - 0x47,0x07,0xa7,0xfc,0xf3,0x73,0x17,0xba,0x83,0x59,0x3c,0x19,0xe6,0x85,0x4f,0xa8, - 0x68,0x6b,0x81,0xb2,0x71,0x64,0xda,0x8b,0xf8,0xeb,0x0f,0x4b,0x70,0x56,0x9d,0x35, - 0x1e,0x24,0x0e,0x5e,0x63,0x58,0xd1,0xa2,0x25,0x22,0x7c,0x3b,0x01,0x21,0x78,0x87, - 0xd4,0x00,0x46,0x57,0x9f,0xd3,0x27,0x52,0x4c,0x36,0x02,0xe7,0xa0,0xc4,0xc8,0x9e, - 0xea,0xbf,0x8a,0xd2,0x40,0xc7,0x38,0xb5,0xa3,0xf7,0xf2,0xce,0xf9,0x61,0x15,0xa1, - 0xe0,0xae,0x5d,0xa4,0x9b,0x34,0x1a,0x55,0xad,0x93,0x32,0x30,0xf5,0x8c,0xb1,0xe3, - 0x1d,0xf6,0xe2,0x2e,0x82,0x66,0xca,0x60,0xc0,0x29,0x23,0xab,0x0d,0x53,0x4e,0x6f, - 0xd5,0xdb,0x37,0x45,0xde,0xfd,0x8e,0x2f,0x03,0xff,0x6a,0x72,0x6d,0x6c,0x5b,0x51, - 0x8d,0x1b,0xaf,0x92,0xbb,0xdd,0xbc,0x7f,0x11,0xd9,0x5c,0x41,0x1f,0x10,0x5a,0xd8, - 0x0a,0xc1,0x31,0x88,0xa5,0xcd,0x7b,0xbd,0x2d,0x74,0xd0,0x12,0xb8,0xe5,0xb4,0xb0, - 0x89,0x69,0x97,0x4a,0x0c,0x96,0x77,0x7e,0x65,0xb9,0xf1,0x09,0xc5,0x6e,0xc6,0x84, - 0x18,0xf0,0x7d,0xec,0x3a,0xdc,0x4d,0x20,0x79,0xee,0x5f,0x3e,0xd7,0xcb,0x39,0x48 -}; - -static void simde_u32_to_u8x4(uint32_t src, uint8_t* dst) { - for(int i = 0; i < 4; ++i) { - *(dst + i) = HEDLEY_STATIC_CAST(uint8_t, ((src << (i * 8)) >> 24)); - } -} - -static void simde_u32_from_u8x4(uint8_t* src, uint32_t* dst) { - *dst = 0; - for(int i = 0; i < 4; ++i) { - *dst = *dst | (HEDLEY_STATIC_CAST(uint32_t, src[i]) << (24 - i * 8)); - } -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm4eq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM4) - return vsm4eq_u32(a, b); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - uint32_t intval, roundkey; - uint8_t _intval[4]; - for(int index = 0; index < 4; ++index) { - roundkey = b_.values[index]; - - intval = a_.values[3] ^ a_.values[2] ^ a_.values[1] ^ roundkey; - - simde_u32_to_u8x4(intval, _intval); - for(int i = 0; i < 4; ++i) { - _intval[i] = simde_sbox_sm4[_intval[i]]; - } - simde_u32_from_u8x4(_intval, &intval); - intval = intval ^ ROL32(intval, 2) ^ ROL32(intval, 10) ^ ROL32(intval, 18) ^ ROL32(intval, 24); - intval = intval ^ a_.values[0]; - - a_.values[0] = a_.values[1]; - a_.values[1] = a_.values[2]; - a_.values[2] = a_.values[3]; - a_.values[3] = intval; - } - return simde_uint32x4_from_private(a_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm4eq_u32 - #define vsm4eq_u32(a, b) simde_vsm4eq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsm4ekeyq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SM4) - return vsm4ekeyq_u32(a, b); - #else - simde_uint32x4_private - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - uint32_t intval, constval; - uint8_t _intval[4]; - for(int index = 0; index < 4; ++index) { - constval = b_.values[index]; - - intval = a_.values[3] ^ a_.values[2] ^ a_.values[1] ^ constval; - - simde_u32_to_u8x4(intval, _intval); - for(int i = 0; i < 4; ++i) { - _intval[i] = simde_sbox_sm4[_intval[i]]; - } - simde_u32_from_u8x4(_intval, &intval); - intval = intval ^ ROL32(intval, 13) ^ ROL32(intval, 23); - intval = intval ^ a_.values[0]; - - a_.values[0] = a_.values[1]; - a_.values[1] = a_.values[2]; - a_.values[2] = a_.values[3]; - a_.values[3] = intval; - } - return simde_uint32x4_from_private(a_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsm4ekeyq_u32 - #define vsm4ekeyq_u32(a, b) simde_vsm4ekeyq_u32((a), (b)) -#endif - -#undef ROR32 -#undef ROL32 -#undef LSR -#undef LSL - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SM4_H) */ -/* :: End simde/arm/neon/sm4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sqadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Atharva Nimbalkar - */ - -#if !defined(SIMDE_ARM_NEON_SQADD_H) -#define SIMDE_ARM_NEON_SQADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -// Workaround on ARM64 windows due to windows SDK bug -// https://developercommunity.visualstudio.com/t/In-arm64_neonh-vsqaddb_u8-vsqaddh_u16/10271747?sort=newest -#if (defined _MSC_VER) && (defined SIMDE_ARM_NEON_A64V8_NATIVE) && (_MSC_VER < 1938) -#pragma message ("Due to msvc bug, current version of msvc is supported by workaround. Recommend to update msvc") -#undef vsqaddb_u8 -#define vsqaddb_u8(src1, src2) neon_usqadds8(__uint8ToN8_v(src1), __int8ToN8_v(src2)).n8_u8[0] -#undef vsqaddh_u16 -#define vsqaddh_u16(src1, src2) neon_usqadds16(__uint16ToN16_v(src1), __int16ToN16_v(src2)).n16_u16[0] -#undef vsqadds_u32 -#define vsqadds_u32(src1, src2) _CopyUInt32FromFloat(neon_usqadds32(_CopyFloatFromUInt32(src1), _CopyFloatFromInt32(src2))) -#undef vsqaddd_u64 -#define vsqaddd_u64(src1, src2) neon_usqadds64(__uint64ToN64_v(src1), __int64ToN64_v(src2)).n64_u64[0] -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t -simde_vsqaddb_u8(uint8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_365298) - return vsqaddb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); - #else - return vsqaddb_u8(a, b); - #endif - #else - int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); - return (r_ < 0) ? 0 : ((r_ > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddb_u8 - #define vsqaddb_u8(a, b) simde_vsqaddb_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint16_t -simde_vsqaddh_u16(uint16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_365298) - return vsqaddh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); - #else - return vsqaddh_u16(a, b); - #endif - #else - int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); - return (r_ < 0) ? 0 : ((r_ > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddh_u16 - #define vsqaddh_u16(a, b) simde_vsqaddh_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_vsqadds_u32(uint32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_365298) - return vsqadds_u32(a, HEDLEY_STATIC_CAST(uint32_t, b)); - #else - return vsqadds_u32(a, b); - #endif - #else - int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); - return (r_ < 0) ? 0 : ((r_ > UINT32_MAX) ? UINT32_MAX : HEDLEY_STATIC_CAST(uint32_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqadds_u32 - #define vsqadds_u32(a, b) simde_vsqadds_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_vsqaddd_u64(uint64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_365298) - return vsqaddd_u64(a, HEDLEY_STATIC_CAST(uint64_t, b)); - #else - return vsqaddd_u64(a, b); - #endif - #else - uint64_t r_; - - if (b > 0) { - uint64_t ub = HEDLEY_STATIC_CAST(uint64_t, b); - r_ = ((UINT64_MAX - a) < ub) ? UINT64_MAX : a + ub; - } else { - uint64_t nb = HEDLEY_STATIC_CAST(uint64_t, -b); - r_ = (nb > a) ? 0 : a - nb; - } - return r_; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddd_u64 - #define vsqaddd_u64(a, b) simde_vsqaddd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vsqadd_u8(simde_uint8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqadd_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqadd_u8 - #define vsqadd_u8(a, b) simde_vsqadd_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vsqadd_u16(simde_uint16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqadd_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqadd_u16 - #define vsqadd_u16(a, b) simde_vsqadd_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vsqadd_u32(simde_uint32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqadd_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqadds_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqadd_u32 - #define vsqadd_u32(a, b) simde_vsqadd_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x1_t -simde_vsqadd_u64(simde_uint64x1_t a, simde_int64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqadd_u64(a, b); - #else - simde_uint64x1_private - r_, - a_ = simde_uint64x1_to_private(a); - simde_int64x1_private b_ = simde_int64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddd_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqadd_u64 - #define vsqadd_u64(a, b) simde_vsqadd_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vsqaddq_u8(simde_uint8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqaddq_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddb_u8(a_.values[i], b_.values[i]); - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddq_u8 - #define vsqaddq_u8(a, b) simde_vsqaddq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsqaddq_u16(simde_uint16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqaddq_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddh_u16(a_.values[i], b_.values[i]); - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddq_u16 - #define vsqaddq_u16(a, b) simde_vsqaddq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsqaddq_u32(simde_uint32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqaddq_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqadds_u32(a_.values[i], b_.values[i]); - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddq_u32 - #define vsqaddq_u32(a, b) simde_vsqaddq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsqaddq_u64(simde_uint64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqaddq_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a); - simde_int64x2_private b_ = simde_int64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqaddd_u64(a_.values[i], b_.values[i]); - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqaddq_u64 - #define vsqaddq_u64(a, b) simde_vsqaddq_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SQADD_H) */ -/* :: End simde/arm/neon/sqadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sqrt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SQRT_H) -#define SIMDE_ARM_NEON_SQRT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16 -simde_vsqrth_f16(simde_float16_t a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsqrth_f16(a); - #elif defined(simde_math_sqrtf) - simde_float32 af = simde_float16_to_float32(a); - return simde_float16_from_float32(simde_math_sqrtf(af)); - #else - HEDLEY_UNREACHABLE(); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsqrth_f16 - #define vsqrth_f16(a) simde_vsqrth_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vsqrt_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsqrt_f16(a); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqrth_f16(a_.values[i]); - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrt_f16 - #define vsqrt_f16(a) simde_vsqrt_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vsqrt_f32(simde_float32x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqrt_f32(a); - #elif defined(simde_math_sqrtf) - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_sqrtf(a_.values[i]); - } - - return simde_float32x2_from_private(r_); - #else - HEDLEY_UNREACHABLE(); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrt_f32 - #define vsqrt_f32(a) simde_vsqrt_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x1_t -simde_vsqrt_f64(simde_float64x1_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqrt_f64(a); - #elif defined(simde_math_sqrt) - simde_float64x1_private - r_, - a_ = simde_float64x1_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_sqrt(a_.values[i]); - } - - return simde_float64x1_from_private(r_); - #else - HEDLEY_UNREACHABLE(); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrt_f64 - #define vsqrt_f64(a) simde_vsqrt_f64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vsqrtq_f16(simde_float16x8_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsqrtq_f16(a); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vsqrth_f16(a_.values[i]); - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrtq_f16 - #define vsqrtq_f16(a) simde_vsqrtq_f16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vsqrtq_f32(simde_float32x4_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqrtq_f32(a); - #elif defined(simde_math_sqrtf) - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_sqrtf(a_.values[i]); - } - - return simde_float32x4_from_private(r_); - #else - HEDLEY_UNREACHABLE(); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrtq_f32 - #define vsqrtq_f32(a) simde_vsqrtq_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vsqrtq_f64(simde_float64x2_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsqrtq_f64(a); - #elif defined(simde_math_sqrt) - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_math_sqrt(a_.values[i]); - } - - return simde_float64x2_from_private(r_); - #else - HEDLEY_UNREACHABLE(); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsqrtq_f64 - #define vsqrtq_f64(a) simde_vsqrtq_f64((a)) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_SQRT_H) */ -/* :: End simde/arm/neon/sqrt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sra_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_ARM_NEON_SRA_N_H) -#define SIMDE_ARM_NEON_SRA_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsrad_n_s64(a, b, n) vsrad_n_s64((a), (b), (n)) -#else - #define simde_vsrad_n_s64(a, b, n) simde_vaddd_s64((a), simde_vshrd_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsrad_n_s64 - #define vsrad_n_s64(a, b, n) simde_vsrad_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsrad_n_u64(a, b, n) vsrad_n_u64((a), (b), (n)) -#else - #define simde_vsrad_n_u64(a, b, n) simde_vaddd_u64((a), simde_vshrd_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsrad_n_u64 - #define vsrad_n_u64(a, b, n) simde_vsrad_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_s8(a, b, n) vsra_n_s8((a), (b), (n)) -#else - #define simde_vsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vshr_n_s8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_s8 - #define vsra_n_s8(a, b, n) simde_vsra_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_s16(a, b, n) vsra_n_s16((a), (b), (n)) -#else - #define simde_vsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vshr_n_s16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_s16 - #define vsra_n_s16(a, b, n) simde_vsra_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_s32(a, b, n) vsra_n_s32((a), (b), (n)) -#else - #define simde_vsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vshr_n_s32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_s32 - #define vsra_n_s32(a, b, n) simde_vsra_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_s64(a, b, n) vsra_n_s64((a), (b), (n)) -#else - #define simde_vsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vshr_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_s64 - #define vsra_n_s64(a, b, n) simde_vsra_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_u8(a, b, n) vsra_n_u8((a), (b), (n)) -#else - #define simde_vsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vshr_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_u8 - #define vsra_n_u8(a, b, n) simde_vsra_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_u16(a, b, n) vsra_n_u16((a), (b), (n)) -#else - #define simde_vsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vshr_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_u16 - #define vsra_n_u16(a, b, n) simde_vsra_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_u32(a, b, n) vsra_n_u32((a), (b), (n)) -#else - #define simde_vsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vshr_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_u32 - #define vsra_n_u32(a, b, n) simde_vsra_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsra_n_u64(a, b, n) vsra_n_u64((a), (b), (n)) -#else - #define simde_vsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vshr_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsra_n_u64 - #define vsra_n_u64(a, b, n) simde_vsra_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_s8(a, b, n) vsraq_n_s8((a), (b), (n)) -#else - #define simde_vsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vshrq_n_s8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_s8 - #define vsraq_n_s8(a, b, n) simde_vsraq_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_s16(a, b, n) vsraq_n_s16((a), (b), (n)) -#else - #define simde_vsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vshrq_n_s16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_s16 - #define vsraq_n_s16(a, b, n) simde_vsraq_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_s32(a, b, n) vsraq_n_s32((a), (b), (n)) -#else - #define simde_vsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vshrq_n_s32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_s32 - #define vsraq_n_s32(a, b, n) simde_vsraq_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_s64(a, b, n) vsraq_n_s64((a), (b), (n)) -#else - #define simde_vsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vshrq_n_s64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_s64 - #define vsraq_n_s64(a, b, n) simde_vsraq_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_u8(a, b, n) vsraq_n_u8((a), (b), (n)) -#else - #define simde_vsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vshrq_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_u8 - #define vsraq_n_u8(a, b, n) simde_vsraq_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_u16(a, b, n) vsraq_n_u16((a), (b), (n)) -#else - #define simde_vsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vshrq_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_u16 - #define vsraq_n_u16(a, b, n) simde_vsraq_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_u32(a, b, n) vsraq_n_u32((a), (b), (n)) -#else - #define simde_vsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vshrq_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_u32 - #define vsraq_n_u32(a, b, n) simde_vsraq_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsraq_n_u64(a, b, n) vsraq_n_u64((a), (b), (n)) -#else - #define simde_vsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vshrq_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsraq_n_u64 - #define vsraq_n_u64(a, b, n) simde_vsraq_n_u64((a), (b), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SRA_N_H) */ -/* :: End simde/arm/neon/sra_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sri_n.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2021 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SRI_N_H) -#define SIMDE_ARM_NEON_SRI_N_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsrid_n_s64(a, b, n) vsrid_n_s64(a, b, n) -#else - #define simde_vsrid_n_s64(a, b, n) \ - HEDLEY_STATIC_CAST(int64_t, \ - simde_vsrid_n_u64(HEDLEY_STATIC_CAST(uint64_t, a), HEDLEY_STATIC_CAST(uint64_t, b), n)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsrid_n_s64 - #define vsrid_n_s64(a, b, n) simde_vsrid_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsrid_n_u64(a, b, n) vsrid_n_u64(a, b, n) -#else -#define simde_vsrid_n_u64(a, b, n) \ - (((a & (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n))) | simde_vshrd_n_u64((b), (n)))) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsrid_n_u64 - #define vsrid_n_u64(a, b, n) simde_vsrid_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_s8(a, b, n) vsri_n_s8((a), (b), (n)) -#else - #define simde_vsri_n_s8(a, b, n) \ - simde_vreinterpret_s8_u8(simde_vsri_n_u8( \ - simde_vreinterpret_u8_s8((a)), simde_vreinterpret_u8_s8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_s8 - #define vsri_n_s8(a, b, n) simde_vsri_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_u8(a, b, n) vsri_n_u8((a), (b), (n)) -#else - #define simde_vsri_n_u8(a, b, n) \ - simde_vorr_u8( \ - simde_vand_u8((a), simde_vdup_n_u8((UINT8_C(0xff) >> (8 - n) << (8 - n)))), \ - simde_vshr_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_u8 - #define vsri_n_u8(a, b, n) simde_vsri_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_s16(a, b, n) vsri_n_s16((a), (b), (n)) -#else - #define simde_vsri_n_s16(a, b, n) \ - simde_vreinterpret_s16_u16(simde_vsri_n_u16( \ - simde_vreinterpret_u16_s16((a)), simde_vreinterpret_u16_s16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_s16 - #define vsri_n_s16(a, b, n) simde_vsri_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_u16(a, b, n) vsri_n_u16((a), (b), (n)) -#else - #define simde_vsri_n_u16(a, b, n) \ - simde_vorr_u16( \ - simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0xffff) >> (16 - n) << (16 - n)))), \ - simde_vshr_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_u16 - #define vsri_n_u16(a, b, n) simde_vsri_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_s32(a, b, n) vsri_n_s32((a), (b), (n)) -#else - #define simde_vsri_n_s32(a, b, n) \ - simde_vreinterpret_s32_u32(simde_vsri_n_u32( \ - simde_vreinterpret_u32_s32((a)), simde_vreinterpret_u32_s32((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_s32 - #define vsri_n_s32(a, b, n) simde_vsri_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_u32(a, b, n) vsri_n_u32((a), (b), (n)) -#else - #define simde_vsri_n_u32(a, b, n) \ - simde_vorr_u32( \ - simde_vand_u32((a), \ - simde_vdup_n_u32((UINT32_C(0xffffffff) >> (32 - n) << (32 - n)))), \ - simde_vshr_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_u32 - #define vsri_n_u32(a, b, n) simde_vsri_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_s64(a, b, n) vsri_n_s64((a), (b), (n)) -#else - #define simde_vsri_n_s64(a, b, n) \ - simde_vreinterpret_s64_u64(simde_vsri_n_u64( \ - simde_vreinterpret_u64_s64((a)), simde_vreinterpret_u64_s64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_s64 - #define vsri_n_s64(a, b, n) simde_vsri_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_u64(a, b, n) vsri_n_u64((a), (b), (n)) -#else -#define simde_vsri_n_u64(a, b, n) \ - simde_vorr_u64( \ - simde_vand_u64((a), simde_vdup_n_u64( \ - (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n)))), \ - simde_vshr_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_u64 - #define vsri_n_u64(a, b, n) simde_vsri_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_s8(a, b, n) vsriq_n_s8((a), (b), (n)) -#else - #define simde_vsriq_n_s8(a, b, n) \ - simde_vreinterpretq_s8_u8(simde_vsriq_n_u8( \ - simde_vreinterpretq_u8_s8((a)), simde_vreinterpretq_u8_s8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_s8 - #define vsriq_n_s8(a, b, n) simde_vsriq_n_s8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_u8(a, b, n) vsriq_n_u8((a), (b), (n)) -#else - #define simde_vsriq_n_u8(a, b, n) \ - simde_vorrq_u8( \ - simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0xff) >> (8 - n) << (8 - n)))), \ - simde_vshrq_n_u8((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_u8 - #define vsriq_n_u8(a, b, n) simde_vsriq_n_u8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_s16(a, b, n) vsriq_n_s16((a), (b), (n)) -#else - #define simde_vsriq_n_s16(a, b, n) \ - simde_vreinterpretq_s16_u16(simde_vsriq_n_u16( \ - simde_vreinterpretq_u16_s16((a)), simde_vreinterpretq_u16_s16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_s16 - #define vsriq_n_s16(a, b, n) simde_vsriq_n_s16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_u16(a, b, n) vsriq_n_u16((a), (b), (n)) -#else - #define simde_vsriq_n_u16(a, b, n) \ - simde_vorrq_u16( \ - simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0xffff) >> (16 - n) << (16 - n)))), \ - simde_vshrq_n_u16((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_u16 - #define vsriq_n_u16(a, b, n) simde_vsriq_n_u16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_s32(a, b, n) vsriq_n_s32((a), (b), (n)) -#else - #define simde_vsriq_n_s32(a, b, n) \ - simde_vreinterpretq_s32_u32(simde_vsriq_n_u32( \ - simde_vreinterpretq_u32_s32((a)), simde_vreinterpretq_u32_s32((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_s32 - #define vsriq_n_s32(a, b, n) simde_vsriq_n_s32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_u32(a, b, n) vsriq_n_u32((a), (b), (n)) -#else - #define simde_vsriq_n_u32(a, b, n) \ - simde_vorrq_u32( \ - simde_vandq_u32((a), \ - simde_vdupq_n_u32((UINT32_C(0xffffffff) >> (32 - n) << (32 - n)))), \ - simde_vshrq_n_u32((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_u32 - #define vsriq_n_u32(a, b, n) simde_vsriq_n_u32((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_s64(a, b, n) vsriq_n_s64((a), (b), (n)) -#else - #define simde_vsriq_n_s64(a, b, n) \ - simde_vreinterpretq_s64_u64(simde_vsriq_n_u64( \ - simde_vreinterpretq_u64_s64((a)), simde_vreinterpretq_u64_s64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_s64 - #define vsriq_n_s64(a, b, n) simde_vsriq_n_s64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_u64(a, b, n) vsriq_n_u64((a), (b), (n)) -#else -#define simde_vsriq_n_u64(a, b, n) \ - simde_vorrq_u64( \ - simde_vandq_u64((a), simde_vdupq_n_u64( \ - (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n)))), \ - simde_vshrq_n_u64((b), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_u64 - #define vsriq_n_u64(a, b, n) simde_vsriq_n_u64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_p8(a, b, n) vsri_n_p8((a), (b), (n)) -#else - #define simde_vsri_n_p8(a, b, n) \ - simde_vreinterpret_p8_u8(simde_vsri_n_u8( \ - simde_vreinterpret_u8_p8((a)), simde_vreinterpret_u8_p8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_p8 - #define vsri_n_p8(a, b, n) simde_vsri_n_p8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsri_n_p16(a, b, n) vsri_n_p16((a), (b), (n)) -#else - #define simde_vsri_n_p16(a, b, n) \ - simde_vreinterpret_p16_u16(simde_vsri_n_u16( \ - simde_vreinterpret_u16_p16((a)), simde_vreinterpret_u16_p16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsri_n_p16 - #define vsri_n_p16(a, b, n) simde_vsri_n_p16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vsri_n_p64(a, b, n) vsri_n_p64((a), (b), (n)) -#else - #define simde_vsri_n_p64(a, b, n) \ - simde_vreinterpret_p64_u64(simde_vsri_n_u64( \ - simde_vreinterpret_u64_p64((a)), simde_vreinterpret_u64_p64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsri_n_p64 - #define vsri_n_p64(a, b, n) simde_vsri_n_p64((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_p8(a, b, n) vsriq_n_p8((a), (b), (n)) -#else - #define simde_vsriq_n_p8(a, b, n) \ - simde_vreinterpretq_p8_u8(simde_vsriq_n_u8( \ - simde_vreinterpretq_u8_p8((a)), simde_vreinterpretq_u8_p8((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_p8 - #define vsriq_n_p8(a, b, n) simde_vsriq_n_p8((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vsriq_n_p16(a, b, n) vsriq_n_p16((a), (b), (n)) -#else - #define simde_vsriq_n_p16(a, b, n) \ - simde_vreinterpretq_p16_u16(simde_vsriq_n_u16( \ - simde_vreinterpretq_u16_p16((a)), simde_vreinterpretq_u16_p16((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_p16 - #define vsriq_n_p16(a, b, n) simde_vsriq_n_p16((a), (b), (n)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define simde_vsriq_n_p64(a, b, n) vsriq_n_p64((a), (b), (n)) -#else - #define simde_vsriq_n_p64(a, b, n) \ - simde_vreinterpretq_p64_u64(simde_vsriq_n_u64( \ - simde_vreinterpretq_u64_p64((a)), simde_vreinterpretq_u64_p64((b)), (n))) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsriq_n_p64 - #define vsriq_n_p64(a, b, n) simde_vsriq_n_p64((a), (b), (n)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SRI_N_H) */ -/* :: End simde/arm/neon/sri_n.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1_H) -#define SIMDE_ARM_NEON_ST1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1_f16(ptr, val); - #else - simde_float16x4_private val_ = simde_float16x4_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f16 - #define vst1_f16(a, b) simde_vst1_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1_f32(ptr, val); - #else - simde_float32x2_private val_ = simde_float32x2_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f32 - #define vst1_f32(a, b) simde_vst1_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_float64x1_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1_f64(ptr, val); - #else - simde_float64x1_private val_ = simde_float64x1_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1_f64 - #define vst1_f64(a, b) simde_vst1_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int8x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s8(ptr, val); - #else - simde_int8x8_private val_ = simde_int8x8_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s8 - #define vst1_s8(a, b) simde_vst1_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s16(ptr, val); - #else - simde_int16x4_private val_ = simde_int16x4_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s16 - #define vst1_s16(a, b) simde_vst1_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s32(ptr, val); - #else - simde_int32x2_private val_ = simde_int32x2_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s32 - #define vst1_s32(a, b) simde_vst1_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_int64x1_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(ptr, val); - #else - simde_int64x1_private val_ = simde_int64x1_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s64 - #define vst1_s64(a, b) simde_vst1_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint8x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_u8(ptr, val); - #else - simde_uint8x8_private val_ = simde_uint8x8_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u8 - #define vst1_u8(a, b) simde_vst1_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_u16(ptr, val); - #else - simde_uint16x4_private val_ = simde_uint16x4_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u16 - #define vst1_u16(a, b) simde_vst1_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_u32(ptr, val); - #else - simde_uint32x2_private val_ = simde_uint32x2_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u32 - #define vst1_u32(a, b) simde_vst1_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_uint64x1_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_u64(ptr, val); - #else - simde_uint64x1_private val_ = simde_uint64x1_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u64 - #define vst1_u64(a, b) simde_vst1_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_float16x8_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1q_f16(ptr, val); - #else - simde_float16x8_private val_ = simde_float16x8_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_f16 - #define vst1q_f16(a, b) simde_vst1q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(ptr, val); - #else - simde_float32x4_private val_ = simde_float32x4_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f32 - #define vst1q_f32(a, b) simde_vst1q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(ptr, val); - #else - simde_float64x2_private val_ = simde_float64x2_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_f64 - #define vst1q_f64(a, b) simde_vst1q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int8x16_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s8(ptr, val); - #else - simde_int8x16_private val_ = simde_int8x16_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s8 - #define vst1q_s8(a, b) simde_vst1q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int16x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s16(ptr, val); - #else - simde_int16x8_private val_ = simde_int16x8_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s16 - #define vst1q_s16(a, b) simde_vst1q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(ptr, val); - #else - simde_int32x4_private val_ = simde_int32x4_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s32 - #define vst1q_s32(a, b) simde_vst1q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(ptr, val); - #else - simde_int64x2_private val_ = simde_int64x2_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s64 - #define vst1q_s64(a, b) simde_vst1q_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint8x16_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_u8(ptr, val); - #else - simde_uint8x16_private val_ = simde_uint8x16_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u8 - #define vst1q_u8(a, b) simde_vst1q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint16x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_u16(ptr, val); - #else - simde_uint16x8_private val_ = simde_uint16x8_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u16 - #define vst1q_u16(a, b) simde_vst1q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_u32(ptr, val); - #else - simde_uint32x4_private val_ = simde_uint32x4_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u32 - #define vst1q_u32(a, b) simde_vst1q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_u64(ptr, val); - #else - simde_uint64x2_private val_ = simde_uint64x2_to_private(val); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(ptr, val_.v128); - #else - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u64 - #define vst1q_u64(a, b) simde_vst1q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_poly8x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_p8(ptr, val); - #else - simde_poly8x8_private val_ = simde_poly8x8_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p8 - #define vst1_p8(a, b) simde_vst1_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_p16(ptr, val); - #else - simde_poly16x4_private val_ = simde_poly16x4_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p16 - #define vst1_p16(a, b) simde_vst1_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_poly64x1_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst1_p64(ptr, val); - #else - simde_poly64x1_private val_ = simde_poly64x1_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_p64 - #define vst1_p64(a, b) simde_vst1_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_poly8x16_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_p8(ptr, val); - #else - simde_poly8x16_private val_ = simde_poly8x16_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p8 - #define vst1q_p8(a, b) simde_vst1q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_poly16x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_p16(ptr, val); - #else - simde_poly16x8_private val_ = simde_poly16x8_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p16 - #define vst1q_p16(a, b) simde_vst1q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst1q_p64(ptr, val); - #else - simde_poly64x2_private val_ = simde_poly64x2_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_p64 - #define vst1q_p64(a, b) simde_vst1q_p64((a), (b)) -#endif - -#if !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vstrq_p128(simde_poly128_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_poly128_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - vstrq_p128(ptr, val); - #else - simde_memcpy(ptr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vstrq_p128 - #define vstrq_p128(a, b) simde_vstrq_p128((a), (b)) -#endif -#endif /* !defined(SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE) */ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1_bf16(ptr, val); - #else - simde_bfloat16x4_private val_ = simde_bfloat16x4_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_bf16 - #define vst1_bf16(a, b) simde_vst1_bf16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_bfloat16x8_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1q_bf16(ptr, val); - #else - simde_bfloat16x8_private val_ = simde_bfloat16x8_to_private(val); - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_bf16 - #define vst1q_bf16(a, b) simde_vst1q_bf16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1_H) */ -/* :: End simde/arm/neon/st1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1_LANE_H) -#define SIMDE_ARM_NEON_ST1_LANE_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_f16(simde_float16_t *ptr, simde_float16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x4_private val_ = simde_float16x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_f16 - #define vst1_lane_f16(a, b, c) simde_vst1_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_f32(simde_float32_t *ptr, simde_float32x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x2_private val_ = simde_float32x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_f32 - #define vst1_lane_f32(a, b, c) simde_vst1_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_f64(simde_float64_t *ptr, simde_float64x1_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst1_lane_f64(ptr, val, 0); - #else - simde_float64x1_private val_ = simde_float64x1_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_f64 - #define vst1_lane_f64(a, b, c) simde_vst1_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_s8(int8_t *ptr, simde_int8x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x8_private val_ = simde_int8x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_s8 - #define vst1_lane_s8(a, b, c) simde_vst1_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_s16(int16_t *ptr, simde_int16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x4_private val_ = simde_int16x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_s16 - #define vst1_lane_s16(a, b, c) simde_vst1_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_s32(int32_t *ptr, simde_int32x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x2_private val_ = simde_int32x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_s32 - #define vst1_lane_s32(a, b, c) simde_vst1_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_s64(int64_t *ptr, simde_int64x1_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - vst1_lane_s64(ptr, val, 0); - #else - simde_int64x1_private val_ = simde_int64x1_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_s64 - #define vst1_lane_s64(a, b, c) simde_vst1_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_u8(uint8_t *ptr, simde_uint8x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x8_private val_ = simde_uint8x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_u8 - #define vst1_lane_u8(a, b, c) simde_vst1_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_u16(uint16_t *ptr, simde_uint16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x4_private val_ = simde_uint16x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_u16 - #define vst1_lane_u16(a, b, c) simde_vst1_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_u32(uint32_t *ptr, simde_uint32x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x2_private val_ = simde_uint32x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_u32 - #define vst1_lane_u32(a, b, c) simde_vst1_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_u64(uint64_t *ptr, simde_uint64x1_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - (void) lane; - vst1_lane_u64(ptr, val, 0); - #else - simde_uint64x1_private val_ = simde_uint64x1_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_u64 - #define vst1_lane_u64(a, b, c) simde_vst1_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_f16(simde_float16_t *ptr, simde_float16x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x8_private val_ = simde_float16x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_f16 - #define vst1q_lane_f16(a, b, c) simde_vst1q_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_f32(simde_float32_t *ptr, simde_float32x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x4_private val_ = simde_float32x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_f32 - #define vst1q_lane_f32(a, b, c) simde_vst1q_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_f64(simde_float64_t *ptr, simde_float64x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float64x2_private val_ = simde_float64x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_f64 - #define vst1q_lane_f64(a, b, c) simde_vst1q_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_s8(int8_t *ptr, simde_int8x16_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x16_private val_ = simde_int8x16_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_s8 - #define vst1q_lane_s8(a, b, c) simde_vst1q_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_s16(int16_t *ptr, simde_int16x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x8_private val_ = simde_int16x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_s16 - #define vst1q_lane_s16(a, b, c) simde_vst1q_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_s32(int32_t *ptr, simde_int32x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x4_private val_ = simde_int32x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_s32 - #define vst1q_lane_s32(a, b, c) simde_vst1q_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_s64(int64_t *ptr, simde_int64x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int64x2_private val_ = simde_int64x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_s64 - #define vst1q_lane_s64(a, b, c) simde_vst1q_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_u8(uint8_t *ptr, simde_uint8x16_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x16_private val_ = simde_uint8x16_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_u8 - #define vst1q_lane_u8(a, b, c) simde_vst1q_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_u16(uint16_t *ptr, simde_uint16x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x8_private val_ = simde_uint16x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_u16 - #define vst1q_lane_u16(a, b, c) simde_vst1q_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_u32(uint32_t *ptr, simde_uint32x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x4_private val_ = simde_uint32x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_u32 - #define vst1q_lane_u32(a, b, c) simde_vst1q_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_u64(uint64_t *ptr, simde_uint64x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint64x2_private val_ = simde_uint64x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_u64 - #define vst1q_lane_u64(a, b, c) simde_vst1q_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_p8(simde_poly8_t *ptr, simde_poly8x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x8_private val_ = simde_poly8x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_p8 - #define vst1_lane_p8(a, b, c) simde_vst1_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_p16(simde_poly16_t *ptr, simde_poly16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x4_private val_ = simde_poly16x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_p16 - #define vst1_lane_p16(a, b, c) simde_vst1_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_p64(simde_poly64_t *ptr, simde_poly64x1_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - (void) lane; - vst1_lane_p64(ptr, val, 0); - #else - simde_poly64x1_private val_ = simde_poly64x1_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_p64 - #define vst1_lane_p64(a, b, c) simde_vst1_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_p8(simde_poly8_t *ptr, simde_poly8x16_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x16_private val_ = simde_poly8x16_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_p8 - #define vst1q_lane_p8(a, b, c) simde_vst1q_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_p16(simde_poly16_t *ptr, simde_poly16x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x8_private val_ = simde_poly16x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_p16 - #define vst1q_lane_p16(a, b, c) simde_vst1q_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_p64(simde_poly64_t *ptr, simde_poly64x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_p64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly64x2_private val_ = simde_poly64x2_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_p64 - #define vst1q_lane_p64(a, b, c) simde_vst1q_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_lane_bf16(simde_bfloat16_t *ptr, simde_bfloat16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x4_private val_ = simde_bfloat16x4_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_lane_bf16 - #define vst1_lane_bf16(a, b, c) simde_vst1_lane_bf16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_lane_bf16(simde_bfloat16_t *ptr, simde_bfloat16x8_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x8_private val_ = simde_bfloat16x8_to_private(val); - *ptr = val_.values[lane]; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_lane_bf16 - #define vst1q_lane_bf16(a, b, c) simde_vst1q_lane_bf16((a), (b), (c)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1_LANE_H) */ - -/* :: End simde/arm/neon/st1_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1_x2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1_X2_H) -#define SIMDE_ARM_NEON_ST1_X2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f16_x2(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_float16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_f16_x2(ptr, val); - #else - simde_float16x4_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_float16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f16_x2 - #define vst1_f16_x2(ptr, val) simde_vst1_f16_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f32_x2(simde_float32 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_f32_x2(ptr, val); - #else - simde_vst1_f32(ptr, val.val[0]); - simde_vst1_f32(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f32_x2 - #define vst1_f32_x2(ptr, val) simde_vst1_f32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f64_x2(simde_float64 ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1_f64_x2(ptr, val); - #else - simde_vst1_f64(ptr, val.val[0]); - simde_vst1_f64(ptr+1, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1_f64_x2 - #define vst1_f64_x2(ptr, val) simde_vst1_f64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s8_x2(int8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s8_x2(ptr, val); - #else - simde_vst1_s8(ptr, val.val[0]); - simde_vst1_s8(ptr+8, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s8_x2 - #define vst1_s8_x2(ptr, val) simde_vst1_s8_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s16_x2(int16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s16_x2(ptr, val); - #else - simde_vst1_s16(ptr, val.val[0]); - simde_vst1_s16(ptr+4, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s16_x2 - #define vst1_s16_x2(ptr, val) simde_vst1_s16_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s32_x2(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s32_x2(ptr, val); - #else - simde_vst1_s32(ptr, val.val[0]); - simde_vst1_s32(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s32_x2 - #define vst1_s32_x2(ptr, val) simde_vst1_s32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s64_x2(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s64_x2(ptr, val); - #else - simde_vst1_s64(ptr, val.val[0]); - simde_vst1_s64(ptr+1, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s64_x2 - #define vst1_s64_x2(ptr, val) simde_vst1_s64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u8_x2(uint8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u8_x2(ptr, val); - #else - simde_vst1_u8(ptr, val.val[0]); - simde_vst1_u8(ptr+8, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u8_x2 - #define vst1_u8_x2(ptr, val) simde_vst1_u8_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u16_x2(uint16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u16_x2(ptr, val); - #else - simde_vst1_u16(ptr, val.val[0]); - simde_vst1_u16(ptr+4, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u16_x2 - #define vst1_u16_x2(ptr, val) simde_vst1_u16_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u32_x2(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u32_x2(ptr, val); - #else - simde_vst1_u32(ptr, val.val[0]); - simde_vst1_u32(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u32_x2 - #define vst1_u32_x2(ptr, val) simde_vst1_u32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u64_x2(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u64_x2(ptr, val); - #else - simde_vst1_u64(ptr, val.val[0]); - simde_vst1_u64(ptr+1, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u64_x2 - #define vst1_u64_x2(ptr, val) simde_vst1_u64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p8_x2(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_poly8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p8_x2(ptr, val); - #else - simde_poly8x8_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly8x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p8_x2 - #define vst1_p8_x2(a, b) simde_vst1_p8_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p16_x2(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_poly16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p16_x2(ptr, val); - #else - simde_poly16x4_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p16_x2 - #define vst1_p16_x2(a, b) simde_vst1_p16_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p64_x2(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p64_x2(ptr, val); - #else - simde_poly64x1_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly64x1_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_p64_x2 - #define vst1_p64_x2(a, b) simde_vst1_p64_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_bf16_x2(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_bfloat16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1_bf16_x2(ptr, val); - #else - simde_bfloat16x4_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_bfloat16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_bf16_x2 - #define vst1_bf16_x2(a, b) simde_vst1_bf16_x2((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1_X2_H) */ -/* :: End simde/arm/neon/st1_x2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1_x3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1_X3_H) -#define SIMDE_ARM_NEON_ST1_X3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f16_x3(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1_f16_x3(ptr, val); - #else - simde_float16x4_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_float16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f16_x3 - #define vst1_f16_x3(a, b) simde_vst1_f16_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f32_x3(simde_float32 ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_f32_x3(ptr, val); - #else - simde_vst1_f32(ptr, val.val[0]); - simde_vst1_f32(ptr+2, val.val[1]); - simde_vst1_f32(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f32_x3 - #define vst1_f32_x3(ptr, val) simde_vst1_f32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f64_x3(simde_float64 ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1_f64_x3(ptr, val); - #else - simde_vst1_f64(ptr, val.val[0]); - simde_vst1_f64(ptr+1, val.val[1]); - simde_vst1_f64(ptr+2, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1_f64_x3 - #define vst1_f64_x3(ptr, val) simde_vst1_f64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s8_x3(int8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s8_x3(ptr, val); - #else - simde_vst1_s8(ptr, val.val[0]); - simde_vst1_s8(ptr+8, val.val[1]); - simde_vst1_s8(ptr+16, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s8_x3 - #define vst1_s8_x3(ptr, val) simde_vst1_s8_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s16_x3(int16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s16_x3(ptr, val); - #else - simde_vst1_s16(ptr, val.val[0]); - simde_vst1_s16(ptr+4, val.val[1]); - simde_vst1_s16(ptr+8, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s16_x3 - #define vst1_s16_x3(ptr, val) simde_vst1_s16_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s32_x3(int32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s32_x3(ptr, val); - #else - simde_vst1_s32(ptr, val.val[0]); - simde_vst1_s32(ptr+2, val.val[1]); - simde_vst1_s32(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s32_x3 - #define vst1_s32_x3(ptr, val) simde_vst1_s32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s64_x3(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s64_x3(ptr, val); - #else - simde_vst1_s64(ptr, val.val[0]); - simde_vst1_s64(ptr+1, val.val[1]); - simde_vst1_s64(ptr+2, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s64_x3 - #define vst1_s64_x3(ptr, val) simde_vst1_s64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u8_x3(uint8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u8_x3(ptr, val); - #else - simde_vst1_u8(ptr, val.val[0]); - simde_vst1_u8(ptr+8, val.val[1]); - simde_vst1_u8(ptr+16, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u8_x3 - #define vst1_u8_x3(ptr, val) simde_vst1_u8_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u16_x3(uint16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u16_x3(ptr, val); - #else - simde_vst1_u16(ptr, val.val[0]); - simde_vst1_u16(ptr+4, val.val[1]); - simde_vst1_u16(ptr+8, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u16_x3 - #define vst1_u16_x3(ptr, val) simde_vst1_u16_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u32_x3(uint32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u32_x3(ptr, val); - #else - simde_vst1_u32(ptr, val.val[0]); - simde_vst1_u32(ptr+2, val.val[1]); - simde_vst1_u32(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u32_x3 - #define vst1_u32_x3(ptr, val) simde_vst1_u32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u64_x3(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u64_x3(ptr, val); - #else - simde_vst1_u64(ptr, val.val[0]); - simde_vst1_u64(ptr+1, val.val[1]); - simde_vst1_u64(ptr+2, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u64_x3 - #define vst1_u64_x3(ptr, val) simde_vst1_u64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p8_x3(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p8_x3(ptr, val); - #else - simde_poly8x8_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly8x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p8_x3 - #define vst1_p8_x3(a, b) simde_vst1_p8_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p16_x3(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_poly16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p16_x3(ptr, val); - #else - simde_poly16x4_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p16_x3 - #define vst1_p16_x3(a, b) simde_vst1_p16_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p64_x3(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p64_x3(ptr, val); - #else - simde_poly64x1_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly64x1_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_p64_x3 - #define vst1_p64_x3(a, b) simde_vst1_p64_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_bf16_x3(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_bfloat16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1_bf16_x3(ptr, val); - #else - simde_bfloat16x4_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_bfloat16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_bf16_x3 - #define vst1_bf16_x3(a, b) simde_vst1_bf16_x3((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1_X3_H) */ -/* :: End simde/arm/neon/st1_x3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1_x4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1_X4_H) -#define SIMDE_ARM_NEON_ST1_X4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f16_x4(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_float16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1_f16_x4(ptr, val); - #else - simde_float16x4_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_float16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f16_x4 - #define vst1_f16_x4(a, b) simde_vst1_f16_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_f32_x4(ptr, val); - #else - simde_vst1_f32(ptr, val.val[0]); - simde_vst1_f32(ptr+2, val.val[1]); - simde_vst1_f32(ptr+4, val.val[2]); - simde_vst1_f32(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_f32_x4 - #define vst1_f32_x4(ptr, val) simde_vst1_f32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_f64_x4(simde_float64 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1_f64_x4(ptr, val); - #else - simde_vst1_f64(ptr, val.val[0]); - simde_vst1_f64(ptr+1, val.val[1]); - simde_vst1_f64(ptr+2, val.val[2]); - simde_vst1_f64(ptr+3, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1_f64_x4 - #define vst1_f64_x4(ptr, val) simde_vst1_f64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s8_x4(int8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_int8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s8_x4(ptr, val); - #else - simde_vst1_s8(ptr, val.val[0]); - simde_vst1_s8(ptr+8, val.val[1]); - simde_vst1_s8(ptr+16, val.val[2]); - simde_vst1_s8(ptr+24, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s8_x4 - #define vst1_s8_x4(ptr, val) simde_vst1_s8_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s16_x4(int16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s16_x4(ptr, val); - #else - simde_vst1_s16(ptr, val.val[0]); - simde_vst1_s16(ptr+4, val.val[1]); - simde_vst1_s16(ptr+8, val.val[2]); - simde_vst1_s16(ptr+12, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s16_x4 - #define vst1_s16_x4(ptr, val) simde_vst1_s16_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s32_x4(int32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s32_x4(ptr, val); - #else - simde_vst1_s32(ptr, val.val[0]); - simde_vst1_s32(ptr+2, val.val[1]); - simde_vst1_s32(ptr+4, val.val[2]); - simde_vst1_s32(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s32_x4 - #define vst1_s32_x4(ptr, val) simde_vst1_s32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_s64_x4(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_s64_x4(ptr, val); - #else - simde_vst1_s64(ptr, val.val[0]); - simde_vst1_s64(ptr+1, val.val[1]); - simde_vst1_s64(ptr+2, val.val[2]); - simde_vst1_s64(ptr+3, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_s64_x4 - #define vst1_s64_x4(ptr, val) simde_vst1_s64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u8_x4(uint8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_uint8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u8_x4(ptr, val); - #else - simde_vst1_u8(ptr, val.val[0]); - simde_vst1_u8(ptr+8, val.val[1]); - simde_vst1_u8(ptr+16, val.val[2]); - simde_vst1_u8(ptr+24, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u8_x4 - #define vst1_u8_x4(ptr, val) simde_vst1_u8_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u16_x4(uint16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u16_x4(ptr, val); - #else - simde_vst1_u16(ptr, val.val[0]); - simde_vst1_u16(ptr+4, val.val[1]); - simde_vst1_u16(ptr+8, val.val[2]); - simde_vst1_u16(ptr+12, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u16_x4 - #define vst1_u16_x4(ptr, val) simde_vst1_u16_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u32_x4(uint32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u32_x4(ptr, val); - #else - simde_vst1_u32(ptr, val.val[0]); - simde_vst1_u32(ptr+2, val.val[1]); - simde_vst1_u32(ptr+4, val.val[2]); - simde_vst1_u32(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u32_x4 - #define vst1_u32_x4(ptr, val) simde_vst1_u32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_u64_x4(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1_u64_x4(ptr, val); - #else - simde_vst1_u64(ptr, val.val[0]); - simde_vst1_u64(ptr+1, val.val[1]); - simde_vst1_u64(ptr+2, val.val[2]); - simde_vst1_u64(ptr+3, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_u64_x4 - #define vst1_u64_x4(ptr, val) simde_vst1_u64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p8_x4(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_poly8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p8_x4(ptr, val); - #else - simde_poly8x8_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly8x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p8_x4 - #define vst1_p8_x4(a, b) simde_vst1_p8_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p16_x4(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_poly16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p16_x4(ptr, val); - #else - simde_poly16x4_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1_p16_x4 - #define vst1_p16_x4(a, b) simde_vst1_p16_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_p64_x4(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1_p64_x4(ptr, val); - #else - simde_poly64x1_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly64x1_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_p64_x4 - #define vst1_p64_x4(a, b) simde_vst1_p64_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1_bf16_x4(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_bfloat16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1_bf16_x4(ptr, val); - #else - simde_bfloat16x4_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_bfloat16x4_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1_bf16_x4 - #define vst1_bf16_x4(a, b) simde_vst1_bf16_x4((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1_X4_H) */ -/* :: End simde/arm/neon/st1_x4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1q_x2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1Q_X2_H) -#define SIMDE_ARM_NEON_ST1Q_X2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f16_x2(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_float16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1q_f16_x2(ptr, val); - #else - simde_float16x8_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_float16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f16_x2 - #define vst1q_f16_x2(a, b) simde_vst1q_f16_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f32_x2(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_f32_x2(ptr, val); - #else - simde_vst1q_f32(ptr, val.val[0]); - simde_vst1q_f32(ptr+4, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f32_x2 - #define vst1q_f32_x2(ptr, val) simde_vst1q_f32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f64_x2(simde_float64 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64_x2(ptr, val); - #else - simde_vst1q_f64(ptr, val.val[0]); - simde_vst1q_f64(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_f64_x2 - #define vst1q_f64_x2(ptr, val) simde_vst1q_f64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s8_x2(int8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_int8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s8_x2(ptr, val); - #else - simde_vst1q_s8(ptr, val.val[0]); - simde_vst1q_s8(ptr+16, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s8_x2 - #define vst1q_s8_x2(ptr, val) simde_vst1q_s8_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s16_x2(int16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s16_x2(ptr, val); - #else - simde_vst1q_s16(ptr, val.val[0]); - simde_vst1q_s16(ptr+8, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s16_x2 - #define vst1q_s16_x2(ptr, val) simde_vst1q_s16_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s32_x2(int32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s32_x2(ptr, val); - #else - simde_vst1q_s32(ptr, val.val[0]); - simde_vst1q_s32(ptr+4, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s32_x2 - #define vst1q_s32_x2(ptr, val) simde_vst1q_s32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s64_x2(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s64_x2(ptr, val); - #else - simde_vst1q_s64(ptr, val.val[0]); - simde_vst1q_s64(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s64_x2 - #define vst1q_s64_x2(ptr, val) simde_vst1q_s64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u8_x2(uint8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_uint8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u8_x2(ptr, val); - #else - simde_vst1q_u8(ptr, val.val[0]); - simde_vst1q_u8(ptr+16, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u8_x2 - #define vst1q_u8_x2(ptr, val) simde_vst1q_u8_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u16_x2(uint16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u16_x2(ptr, val); - #else - simde_vst1q_u16(ptr, val.val[0]); - simde_vst1q_u16(ptr+8, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u16_x2 - #define vst1q_u16_x2(ptr, val) simde_vst1q_u16_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u32_x2(uint32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u32_x2(ptr, val); - #else - simde_vst1q_u32(ptr, val.val[0]); - simde_vst1q_u32(ptr+4, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u32_x2 - #define vst1q_u32_x2(ptr, val) simde_vst1q_u32_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u64_x2(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u64_x2(ptr, val); - #else - simde_vst1q_u64(ptr, val.val[0]); - simde_vst1q_u64(ptr+2, val.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u64_x2 - #define vst1q_u64_x2(ptr, val) simde_vst1q_u64_x2((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p8_x2(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_poly8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p8_x2(ptr, val); - #else - simde_poly8x16_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly8x16_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p8_x2 - #define vst1q_p8_x2(a, b) simde_vst1q_p8_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p16_x2(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_poly16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p16_x2(ptr, val); - #else - simde_poly16x8_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p16_x2 - #define vst1q_p16_x2(a, b) simde_vst1q_p16_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p64_x2(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p64_x2(ptr, val); - #else - simde_poly64x2_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_poly64x2_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_p64_x2 - #define vst1q_p64_x2(a, b) simde_vst1q_p64_x2((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_bf16_x2(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_bfloat16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1q_bf16_x2(ptr, val); - #else - simde_bfloat16x8_private val_[2]; - for (size_t i = 0; i < 2; i++) { - val_[i] = simde_bfloat16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_bf16_x2 - #define vst1q_bf16_x2(a, b) simde_vst1q_bf16_x2((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1Q_X2_H) */ -/* :: End simde/arm/neon/st1q_x2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1q_x3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1Q_X3_H) -#define SIMDE_ARM_NEON_ST1Q_X3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f16_x3(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_float16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1q_f16_x3(ptr, val); - #else - simde_float16x8_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_float16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f16_x3 - #define vst1q_f16_x3(a, b) simde_vst1q_f16_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f32_x3(simde_float32 ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_f32_x3(ptr, val); - #else - simde_vst1q_f32(ptr, val.val[0]); - simde_vst1q_f32(ptr+4, val.val[1]); - simde_vst1q_f32(ptr+8, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f32_x3 - #define vst1q_f32_x3(ptr, val) simde_vst1q_f32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f64_x3(simde_float64 ptr[HEDLEY_ARRAY_PARAM(6)], simde_float64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64_x3(ptr, val); - #else - simde_vst1q_f64(ptr, val.val[0]); - simde_vst1q_f64(ptr+2, val.val[1]); - simde_vst1q_f64(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_f64_x3 - #define vst1q_f64_x3(ptr, val) simde_vst1q_f64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s8_x3(int8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_int8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s8_x3(ptr, val); - #else - simde_vst1q_s8(ptr, val.val[0]); - simde_vst1q_s8(ptr+16, val.val[1]); - simde_vst1q_s8(ptr+32, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s8_x3 - #define vst1q_s8_x3(ptr, val) simde_vst1q_s8_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s16_x3(int16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s16_x3(ptr, val); - #else - simde_vst1q_s16(ptr, val.val[0]); - simde_vst1q_s16(ptr+8, val.val[1]); - simde_vst1q_s16(ptr+16, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s16_x3 - #define vst1q_s16_x3(ptr, val) simde_vst1q_s16_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s32_x3(int32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s32_x3(ptr, val); - #else - simde_vst1q_s32(ptr, val.val[0]); - simde_vst1q_s32(ptr+4, val.val[1]); - simde_vst1q_s32(ptr+8, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s32_x3 - #define vst1q_s32_x3(ptr, val) simde_vst1q_s32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s64_x3(int64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s64_x3(ptr, val); - #else - simde_vst1q_s64(ptr, val.val[0]); - simde_vst1q_s64(ptr+2, val.val[1]); - simde_vst1q_s64(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s64_x3 - #define vst1q_s64_x3(ptr, val) simde_vst1q_s64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u8_x3(uint8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_uint8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u8_x3(ptr, val); - #else - simde_vst1q_u8(ptr, val.val[0]); - simde_vst1q_u8(ptr+16, val.val[1]); - simde_vst1q_u8(ptr+32, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u8_x3 - #define vst1q_u8_x3(ptr, val) simde_vst1q_u8_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u16_x3(uint16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u16_x3(ptr, val); - #else - simde_vst1q_u16(ptr, val.val[0]); - simde_vst1q_u16(ptr+8, val.val[1]); - simde_vst1q_u16(ptr+16, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u16_x3 - #define vst1q_u16_x3(ptr, val) simde_vst1q_u16_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u32_x3(uint32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u32_x3(ptr, val); - #else - simde_vst1q_u32(ptr, val.val[0]); - simde_vst1q_u32(ptr+4, val.val[1]); - simde_vst1q_u32(ptr+8, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u32_x3 - #define vst1q_u32_x3(ptr, val) simde_vst1q_u32_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u64_x3(uint64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u64_x3(ptr, val); - #else - simde_vst1q_u64(ptr, val.val[0]); - simde_vst1q_u64(ptr+2, val.val[1]); - simde_vst1q_u64(ptr+4, val.val[2]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u64_x3 - #define vst1q_u64_x3(ptr, val) simde_vst1q_u64_x3((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p8_x3(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_poly8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p8_x3(ptr, val); - #else - simde_poly8x16_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly8x16_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p8_x3 - #define vst1q_p8_x3(a, b) simde_vst1q_p8_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p16_x3(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p16_x3(ptr, val); - #else - simde_poly16x8_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p16_x3 - #define vst1q_p16_x3(a, b) simde_vst1q_p16_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p64_x3(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_poly64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p64_x3(ptr, val); - #else - simde_poly64x2_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_poly64x2_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_p64_x3 - #define vst1q_p64_x3(a, b) simde_vst1q_p64_x3((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_bf16_x3(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_bfloat16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1q_bf16_x3(ptr, val); - #else - simde_bfloat16x8_private val_[3]; - for (size_t i = 0; i < 3; i++) { - val_[i] = simde_bfloat16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_bf16_x3 - #define vst1q_bf16_x3(a, b) simde_vst1q_bf16_x3((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1Q_X3_H) */ -/* :: End simde/arm/neon/st1q_x3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st1q_x4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST1Q_X4_H) -#define SIMDE_ARM_NEON_ST1Q_X4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f16_x4(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_float16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst1q_f16_x4(ptr, val); - #else - simde_float16x8_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_float16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f16_x4 - #define vst1q_f16_x4(a, b) simde_vst1q_f16_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(16)], simde_float32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_f32_x4(ptr, val); - #else - simde_vst1q_f32(ptr, val.val[0]); - simde_vst1q_f32(ptr+4, val.val[1]); - simde_vst1q_f32(ptr+8, val.val[2]); - simde_vst1q_f32(ptr+12, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_f32_x4 - #define vst1q_f32_x4(ptr, val) simde_vst1q_f32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_f64_x4(simde_float64 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64_x4(ptr, val); - #else - simde_vst1q_f64(ptr, val.val[0]); - simde_vst1q_f64(ptr+2, val.val[1]); - simde_vst1q_f64(ptr+4, val.val[2]); - simde_vst1q_f64(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_f64_x4 - #define vst1q_f64_x4(ptr, val) simde_vst1q_f64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s8_x4(int8_t ptr[HEDLEY_ARRAY_PARAM(64)], simde_int8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s8_x4(ptr, val); - #else - simde_vst1q_s8(ptr, val.val[0]); - simde_vst1q_s8(ptr+16, val.val[1]); - simde_vst1q_s8(ptr+32, val.val[2]); - simde_vst1q_s8(ptr+48, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s8_x4 - #define vst1q_s8_x4(ptr, val) simde_vst1q_s8_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s16_x4(int16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_int16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s16_x4(ptr, val); - #else - simde_vst1q_s16(ptr, val.val[0]); - simde_vst1q_s16(ptr+8, val.val[1]); - simde_vst1q_s16(ptr+16, val.val[2]); - simde_vst1q_s16(ptr+24, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s16_x4 - #define vst1q_s16_x4(ptr, val) simde_vst1q_s16_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s32_x4(int32_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s32_x4(ptr, val); - #else - simde_vst1q_s32(ptr, val.val[0]); - simde_vst1q_s32(ptr+4, val.val[1]); - simde_vst1q_s32(ptr+8, val.val[2]); - simde_vst1q_s32(ptr+12, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s32_x4 - #define vst1q_s32_x4(ptr, val) simde_vst1q_s32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_s64_x4(int64_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_s64_x4(ptr, val); - #else - simde_vst1q_s64(ptr, val.val[0]); - simde_vst1q_s64(ptr+2, val.val[1]); - simde_vst1q_s64(ptr+4, val.val[2]); - simde_vst1q_s64(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_s64_x4 - #define vst1q_s64_x4(ptr, val) simde_vst1q_s64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u8_x4(uint8_t ptr[HEDLEY_ARRAY_PARAM(64)], simde_uint8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u8_x4(ptr, val); - #else - simde_vst1q_u8(ptr, val.val[0]); - simde_vst1q_u8(ptr+16, val.val[1]); - simde_vst1q_u8(ptr+32, val.val[2]); - simde_vst1q_u8(ptr+48, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u8_x4 - #define vst1q_u8_x4(ptr, val) simde_vst1q_u8_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u16_x4(uint16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_uint16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u16_x4(ptr, val); - #else - simde_vst1q_u16(ptr, val.val[0]); - simde_vst1q_u16(ptr+8, val.val[1]); - simde_vst1q_u16(ptr+16, val.val[2]); - simde_vst1q_u16(ptr+24, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u16_x4 - #define vst1q_u16_x4(ptr, val) simde_vst1q_u16_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u32_x4(uint32_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u32_x4(ptr, val); - #else - simde_vst1q_u32(ptr, val.val[0]); - simde_vst1q_u32(ptr+4, val.val[1]); - simde_vst1q_u32(ptr+8, val.val[2]); - simde_vst1q_u32(ptr+12, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u32_x4 - #define vst1q_u32_x4(ptr, val) simde_vst1q_u32_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_u64_x4(uint64_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) - vst1q_u64_x4(ptr, val); - #else - simde_vst1q_u64(ptr, val.val[0]); - simde_vst1q_u64(ptr+2, val.val[1]); - simde_vst1q_u64(ptr+4, val.val[2]); - simde_vst1q_u64(ptr+6, val.val[3]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_u64_x4 - #define vst1q_u64_x4(ptr, val) simde_vst1q_u64_x4((ptr), (val)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p8_x4(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(64)], simde_poly8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p8_x4(ptr, val); - #else - simde_poly8x16_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly8x16_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p8_x4 - #define vst1q_p8_x4(a, b) simde_vst1q_p8_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p16_x4(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_poly16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p16_x4(ptr, val); - #else - simde_poly16x8_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst1q_p16_x4 - #define vst1q_p16_x4(a, b) simde_vst1q_p16_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_p64_x4(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_poly64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) - vst1q_p64_x4(ptr, val); - #else - simde_poly64x2_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_poly64x2_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_p64_x4 - #define vst1q_p64_x4(a, b) simde_vst1q_p64_x4((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst1q_bf16_x4(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_bfloat16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst1q_bf16_x4(ptr, val); - #else - simde_bfloat16x8_private val_[4]; - for (size_t i = 0; i < 4; i++) { - val_[i] = simde_bfloat16x8_to_private(val.val[i]); - } - simde_memcpy(ptr, &val_, sizeof(val_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst1q_bf16_x4 - #define vst1q_bf16_x4(a, b) simde_vst1q_bf16_x4((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST1Q_X4_H) */ -/* :: End simde/arm/neon/st1q_x4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST2_H) -#define SIMDE_ARM_NEON_ST2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/zip.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ZIP_H) && !defined(SIMDE_BUG_INTEL_857088) -#define SIMDE_ARM_NEON_ZIP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/zip1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ZIP1_H) -#define SIMDE_ARM_NEON_ZIP1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vzip1_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzip1_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_f16 - #define vzip1_f16(a, b) simde_vzip1_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vzip1_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2x2_t tmp = vzip_f32(a, b); - return tmp.val[0]; - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_f32 - #define vzip1_f32(a, b) simde_vzip1_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vzip1_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8x2_t tmp = vzip_s8(a, b); - return tmp.val[0]; - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_s8 - #define vzip1_s8(a, b) simde_vzip1_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vzip1_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t tmp = vzip_s16(a, b); - return tmp.val[0]; - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_s16 - #define vzip1_s16(a, b) simde_vzip1_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vzip1_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t tmp = vzip_s32(a, b); - return tmp.val[0]; - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_s32 - #define vzip1_s32(a, b) simde_vzip1_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vzip1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t tmp = vzip_u8(a, b); - return tmp.val[0]; - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_u8 - #define vzip1_u8(a, b) simde_vzip1_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vzip1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4x2_t tmp = vzip_u16(a, b); - return tmp.val[0]; - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_u16 - #define vzip1_u16(a, b) simde_vzip1_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vzip1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2x2_t tmp = vzip_u32(a, b); - return tmp.val[0]; - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_u32 - #define vzip1_u32(a, b) simde_vzip1_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vzip1q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzip1q_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_f16 - #define vzip1q_f16(a, b) simde_vzip1q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vzip1q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_f32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2x2_t tmp = vzip_f32(vget_low_f32(a), vget_low_f32(b)); - return vcombine_f32(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); - #elif defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_unpacklo_ps(a_.m128, b_.m128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_f32 - #define vzip1q_f32(a, b) simde_vzip1q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vzip1q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergeh(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_unpacklo_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_f64 - #define vzip1q_f64(a, b) simde_vzip1q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vzip1q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_s8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8x2_t tmp = vzip_s8(vget_low_s8(a), vget_low_s8(b)); - return vcombine_s8(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_s8 - #define vzip1q_s8(a, b) simde_vzip1q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vzip1q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_s16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t tmp = vzip_s16(vget_low_s16(a), vget_low_s16(b)); - return vcombine_s16(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_s16 - #define vzip1q_s16(a, b) simde_vzip1q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vzip1q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_s32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t tmp = vzip_s32(vget_low_s32(a), vget_low_s32(b)); - return vcombine_s32(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_s32 - #define vzip1q_s32(a, b) simde_vzip1q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vzip1q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergeh(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_s64 - #define vzip1q_s64(a, b) simde_vzip1q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vzip1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_u8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8x2_t tmp = vzip_u8(vget_low_u8(a), vget_low_u8(b)); - return vcombine_u8(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_u8 - #define vzip1q_u8(a, b) simde_vzip1q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vzip1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_u16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4x2_t tmp = vzip_u16(vget_low_u16(a), vget_low_u16(b)); - return vcombine_u16(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_u16 - #define vzip1q_u16(a, b) simde_vzip1q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vzip1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_u32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2x2_t tmp = vzip_u32(vget_low_u32(a), vget_low_u32(b)); - return vcombine_u32(tmp.val[0], tmp.val[1]); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergeh(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_u32 - #define vzip1q_u32(a, b) simde_vzip1q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vzip1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergeh(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpacklo_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_u64 - #define vzip1q_u64(a, b) simde_vzip1q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vzip1_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x8x2_t tmp = vzip_p8(a, b); - return tmp.val[0]; - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_p8 - #define vzip1_p8(a, b) simde_vzip1_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vzip1_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x4x2_t tmp = vzip_p16(a, b); - return tmp.val[0]; - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1_p16 - #define vzip1_p16(a, b) simde_vzip1_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vzip1q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_p8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly8x8x2_t tmp = vzip_p8(vget_low_p8(a), vget_low_p8(b)); - return vcombine_p8(tmp.val[0], tmp.val[1]); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_p8 - #define vzip1q_p8(a, b) simde_vzip1q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vzip1q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_p16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde_poly16x4x2_t tmp = vzip_p16(vget_low_p16(a), vget_low_p16(b)); - return vcombine_p16(tmp.val[0], tmp.val[1]); - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_p16 - #define vzip1q_p16(a, b) simde_vzip1q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vzip1q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip1q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[2 * i ] = a_.values[i]; - r_.values[2 * i + 1] = b_.values[i]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip1q_p64 - #define vzip1q_p64(a, b) simde_vzip1q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ZIP1_H) */ -/* :: End simde/arm/neon/zip1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/zip2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ZIP2_H) -#define SIMDE_ARM_NEON_ZIP2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vzip2_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzip2_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_f16 - #define vzip2_f16(a, b) simde_vzip2_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vzip2_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_f32 - #define vzip2_f32(a, b) simde_vzip2_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vzip2_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_s8 - #define vzip2_s8(a, b) simde_vzip2_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vzip2_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_s16 - #define vzip2_s16(a, b) simde_vzip2_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vzip2_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_s32 - #define vzip2_s32(a, b) simde_vzip2_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vzip2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi8(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_u8 - #define vzip2_u8(a, b) simde_vzip2_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vzip2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi16(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_u16 - #define vzip2_u16(a, b) simde_vzip2_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vzip2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_u32 - #define vzip2_u32(a, b) simde_vzip2_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vzip2q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzip2q_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_f16 - #define vzip2q_f16(a, b) simde_vzip2q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vzip2q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_f32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); - #elif defined(SIMDE_X86_SSE_NATIVE) - r_.m128 = _mm_unpackhi_ps(a_.m128, b_.m128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_f32 - #define vzip2q_f32(a, b) simde_vzip2q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vzip2q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_f64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128d = _mm_unpackhi_pd(a_.m128d, b_.m128d); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_f64 - #define vzip2q_f64(a, b) simde_vzip2q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vzip2q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_s8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_s8 - #define vzip2q_s8(a, b) simde_vzip2q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vzip2q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_s16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_s16 - #define vzip2q_s16(a, b) simde_vzip2q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vzip2q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_s32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_s32 - #define vzip2q_s32(a, b) simde_vzip2q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vzip2q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_s64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_s64 - #define vzip2q_s64(a, b) simde_vzip2q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vzip2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_u8(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi8(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_u8 - #define vzip2q_u8(a, b) simde_vzip2q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vzip2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_u16(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi16(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_u16 - #define vzip2q_u16(a, b) simde_vzip2q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vzip2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_u32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_mergel(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi32(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_u32 - #define vzip2q_u32(a, b) simde_vzip2q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vzip2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_u64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return vec_mergel(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); - #else - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_u64 - #define vzip2q_u64(a, b) simde_vzip2q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vzip2_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_p8(a, b); - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_p8 - #define vzip2_p8(a, b) simde_vzip2_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vzip2_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2_p16(a, b); - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2_p16 - #define vzip2_p16(a, b) simde_vzip2_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vzip2q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_p8(a, b); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_p8 - #define vzip2q_p8(a, b) simde_vzip2q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vzip2q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_p16(a, b); - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_p16 - #define vzip2q_p16(a, b) simde_vzip2q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vzip2q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vzip2q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - r_.values[(2 * i) ] = a_.values[halfway_point + i]; - r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vzip2q_p64 - #define vzip2q_p64(a, b) simde_vzip2q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ZIP2_H) */ -/* :: End simde/arm/neon/zip2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vzip_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzip_f16(a, b); - #else - simde_float16x4x2_t r = { { simde_vzip1_f16(a, b), simde_vzip2_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_f16 - #define vzip_f16(a, b) simde_vzip_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vzip_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_f32(a, b); - #else - simde_float32x2x2_t r = { { simde_vzip1_f32(a, b), simde_vzip2_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_f32 - #define vzip_f32(a, b) simde_vzip_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vzip_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_s8(a, b); - #else - simde_int8x8x2_t r = { { simde_vzip1_s8(a, b), simde_vzip2_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_s8 - #define vzip_s8(a, b) simde_vzip_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vzip_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_s16(a, b); - #else - simde_int16x4x2_t r = { { simde_vzip1_s16(a, b), simde_vzip2_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_s16 - #define vzip_s16(a, b) simde_vzip_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vzip_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_s32(a, b); - #else - simde_int32x2x2_t r = { { simde_vzip1_s32(a, b), simde_vzip2_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_s32 - #define vzip_s32(a, b) simde_vzip_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vzip_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_u8(a, b); - #else - simde_uint8x8x2_t r = { { simde_vzip1_u8(a, b), simde_vzip2_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_u8 - #define vzip_u8(a, b) simde_vzip_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vzip_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_u16(a, b); - #else - simde_uint16x4x2_t r = { { simde_vzip1_u16(a, b), simde_vzip2_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_u16 - #define vzip_u16(a, b) simde_vzip_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vzip_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_u32(a, b); - #else - simde_uint32x2x2_t r = { { simde_vzip1_u32(a, b), simde_vzip2_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_u32 - #define vzip_u32(a, b) simde_vzip_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vzipq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vzipq_f16(a, b); - #else - simde_float16x8x2_t r = { { simde_vzip1q_f16(a, b), simde_vzip2q_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_f16 - #define vzipq_f16(a, b) simde_vzipq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vzipq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_f32(a, b); - #else - simde_float32x4x2_t r = { { simde_vzip1q_f32(a, b), simde_vzip2q_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_f32 - #define vzipq_f32(a, b) simde_vzipq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vzipq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_s8(a, b); - #else - simde_int8x16x2_t r = { { simde_vzip1q_s8(a, b), simde_vzip2q_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_s8 - #define vzipq_s8(a, b) simde_vzipq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vzipq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_s16(a, b); - #else - simde_int16x8x2_t r = { { simde_vzip1q_s16(a, b), simde_vzip2q_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_s16 - #define vzipq_s16(a, b) simde_vzipq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vzipq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_s32(a, b); - #else - simde_int32x4x2_t r = { { simde_vzip1q_s32(a, b), simde_vzip2q_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_s32 - #define vzipq_s32(a, b) simde_vzipq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vzipq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_u8(a, b); - #else - simde_uint8x16x2_t r = { { simde_vzip1q_u8(a, b), simde_vzip2q_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_u8 - #define vzipq_u8(a, b) simde_vzipq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vzipq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_u16(a, b); - #else - simde_uint16x8x2_t r = { { simde_vzip1q_u16(a, b), simde_vzip2q_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_u16 - #define vzipq_u16(a, b) simde_vzipq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vzipq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_u32(a, b); - #else - simde_uint32x4x2_t r = { { simde_vzip1q_u32(a, b), simde_vzip2q_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_u32 - #define vzipq_u32(a, b) simde_vzipq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vzip_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_p8(a, b); - #else - simde_poly8x8x2_t r = { { simde_vzip1_p8(a, b), simde_vzip2_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_p8 - #define vzip_p8(a, b) simde_vzip_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vzip_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzip_p16(a, b); - #else - simde_poly16x4x2_t r = { { simde_vzip1_p16(a, b), simde_vzip2_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzip_p16 - #define vzip_p16(a, b) simde_vzip_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vzipq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_p8(a, b); - #else - simde_poly8x16x2_t r = { { simde_vzip1q_p8(a, b), simde_vzip2q_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_p8 - #define vzipq_p8(a, b) simde_vzipq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vzipq_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vzipq_p16(a, b); - #else - simde_poly16x8x2_t r = { { simde_vzip1q_p16(a, b), simde_vzip2q_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vzipq_p16 - #define vzipq_p16(a, b) simde_vzipq_p16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ZIP_H) */ -/* :: End simde/arm/neon/zip.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_f16(simde_float16_t *ptr, simde_float16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst2_f16(ptr, val); - #else - simde_float16_t buf[8]; - simde_float16x4_private a_[2] = {simde_float16x4_to_private(val.val[0]), - simde_float16x4_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_f16 - #define vst2_f16(a, b) simde_vst2_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_f32(simde_float32_t *ptr, simde_float32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_f32(ptr, val); - #else - simde_float32_t buf[4]; - simde_float32x2_private a_[2] = {simde_float32x2_to_private(val.val[0]), - simde_float32x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_f32 - #define vst2_f32(a, b) simde_vst2_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_f64(simde_float64_t *ptr, simde_float64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst2_f64(ptr, val); - #else - simde_float64_t buf[2]; - simde_float64x1_private a_[2] = {simde_float64x1_to_private(val.val[0]), - simde_float64x1_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2_f64 - #define vst2_f64(a, b) simde_vst2_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_s8(int8_t *ptr, simde_int8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_s8(ptr, val); - #else - int8_t buf[16]; - simde_int8x8_private a_[2] = {simde_int8x8_to_private(val.val[0]), - simde_int8x8_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_s8 - #define vst2_s8(a, b) simde_vst2_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_s16(int16_t *ptr, simde_int16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_s16(ptr, val); - #else - int16_t buf[8]; - simde_int16x4_private a_[2] = {simde_int16x4_to_private(val.val[0]), - simde_int16x4_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_s16 - #define vst2_s16(a, b) simde_vst2_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_s32(int32_t *ptr, simde_int32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_s32(ptr, val); - #else - int32_t buf[4]; - simde_int32x2_private a_[2] = {simde_int32x2_to_private(val.val[0]), - simde_int32x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_s32 - #define vst2_s32(a, b) simde_vst2_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_s64(int64_t *ptr, simde_int64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_s64(ptr, val); - #else - int64_t buf[2]; - simde_int64x1_private a_[2] = {simde_int64x1_to_private(val.val[0]), - simde_int64x1_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_s64 - #define vst2_s64(a, b) simde_vst2_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_u8(uint8_t *ptr, simde_uint8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_u8(ptr, val); - #else - uint8_t buf[16]; - simde_uint8x8_private a_[2] = {simde_uint8x8_to_private(val.val[0]), - simde_uint8x8_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_u8 - #define vst2_u8(a, b) simde_vst2_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_u16(uint16_t *ptr, simde_uint16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_u16(ptr, val); - #else - uint16_t buf[8]; - simde_uint16x4_private a_[2] = {simde_uint16x4_to_private(val.val[0]), - simde_uint16x4_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_u16 - #define vst2_u16(a, b) simde_vst2_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_u32(uint32_t *ptr, simde_uint32x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_u32(ptr, val); - #else - uint32_t buf[4]; - simde_uint32x2_private a_[2] = {simde_uint32x2_to_private(val.val[0]), - simde_uint32x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_u32 - #define vst2_u32(a, b) simde_vst2_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_u64(uint64_t *ptr, simde_uint64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_u64(ptr, val); - #else - uint64_t buf[2]; - simde_uint64x1_private a_[2] = {simde_uint64x1_to_private(val.val[0]), - simde_uint64x1_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_u64 - #define vst2_u64(a, b) simde_vst2_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_f16(simde_float16_t *ptr, simde_float16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst2q_f16(ptr, val); - #else - simde_float16x8x2_t r = simde_vzipq_f16(val.val[0], val.val[1]); - simde_vst1q_f16(ptr, r.val[0]); - simde_vst1q_f16(ptr+8, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_f16 - #define vst2q_f16(a, b) simde_vst2q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_f32(simde_float32_t *ptr, simde_float32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_f32(ptr, val); - #else - simde_float32x4x2_t r = simde_vzipq_f32(val.val[0], val.val[1]); - simde_vst1q_f32(ptr, r.val[0]); - simde_vst1q_f32(ptr+4, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_f32 - #define vst2q_f32(a, b) simde_vst2q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_f64(simde_float64_t *ptr, simde_float64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst2q_f64(ptr, val); - #else - simde_float64_t buf[4]; - simde_float64x2_private a_[2] = {simde_float64x2_to_private(val.val[0]), - simde_float64x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_f64 - #define vst2q_f64(a, b) simde_vst2q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_s8(int8_t *ptr, simde_int8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_s8(ptr, val); - #else - simde_int8x16x2_t r = simde_vzipq_s8(val.val[0], val.val[1]); - simde_vst1q_s8(ptr, r.val[0]); - simde_vst1q_s8(ptr+16, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_s8 - #define vst2q_s8(a, b) simde_vst2q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_s16(int16_t *ptr, simde_int16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_s16(ptr, val); - #else - simde_int16x8x2_t r = simde_vzipq_s16(val.val[0], val.val[1]); - simde_vst1q_s16(ptr, r.val[0]); - simde_vst1q_s16(ptr+8, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_s16 - #define vst2q_s16(a, b) simde_vst2q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_s32(int32_t *ptr, simde_int32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_s32(ptr, val); - #else - simde_int32x4x2_t r = simde_vzipq_s32(val.val[0], val.val[1]); - simde_vst1q_s32(ptr, r.val[0]); - simde_vst1q_s32(ptr+4, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_s32 - #define vst2q_s32(a, b) simde_vst2q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_s64(int64_t *ptr, simde_int64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst2q_s64(ptr, val); - #else - int64_t buf[4]; - simde_int64x2_private a_[2] = {simde_int64x2_to_private(val.val[0]), - simde_int64x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_s64 - #define vst2q_s64(a, b) simde_vst2q_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_u8(uint8_t *ptr, simde_uint8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_u8(ptr, val); - #else - simde_uint8x16x2_t r = simde_vzipq_u8(val.val[0], val.val[1]); - simde_vst1q_u8(ptr, r.val[0]); - simde_vst1q_u8(ptr+16, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_u8 - #define vst2q_u8(a, b) simde_vst2q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_u16(uint16_t *ptr, simde_uint16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_u16(ptr, val); - #else - simde_uint16x8x2_t r = simde_vzipq_u16(val.val[0], val.val[1]); - simde_vst1q_u16(ptr, r.val[0]); - simde_vst1q_u16(ptr+8, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_u16 - #define vst2q_u16(a, b) simde_vst2q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_u32(uint32_t *ptr, simde_uint32x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_u32(ptr, val); - #else - simde_uint32x4x2_t r = simde_vzipq_u32(val.val[0], val.val[1]); - simde_vst1q_u32(ptr, r.val[0]); - simde_vst1q_u32(ptr+4, r.val[1]); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_u32 - #define vst2q_u32(a, b) simde_vst2q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_u64(uint64_t *ptr, simde_uint64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst2q_u64(ptr, val); - #else - uint64_t buf[4]; - simde_uint64x2_private a_[2] = {simde_uint64x2_to_private(val.val[0]), - simde_uint64x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_u64 - #define vst2q_u64(a, b) simde_vst2q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_p8(simde_poly8_t *ptr, simde_poly8x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_p8(ptr, val); - #else - simde_poly8_t buf[16]; - simde_poly8x8_private a_[2] = {simde_poly8x8_to_private(val.val[0]), - simde_poly8x8_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_p8 - #define vst2_p8(a, b) simde_vst2_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_p16(simde_poly16_t *ptr, simde_poly16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2_p16(ptr, val); - #else - simde_poly16_t buf[8]; - simde_poly16x4_private a_[2] = {simde_poly16x4_to_private(val.val[0]), - simde_poly16x4_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_p16 - #define vst2_p16(a, b) simde_vst2_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_p64(simde_poly64_t *ptr, simde_poly64x1x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst2_p64(ptr, val); - #else - simde_poly64_t buf[2]; - simde_poly64x1_private a_[2] = {simde_poly64x1_to_private(val.val[0]), - simde_poly64x1_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst2_p64 - #define vst2_p64(a, b) simde_vst2_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_p8(simde_poly8_t *ptr, simde_poly8x16x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_p8(ptr, val); - #else - simde_poly8_t buf[32]; - simde_poly8x16_private a_[2] = {simde_poly8x16_to_private(val.val[0]), - simde_poly8x16_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_p8 - #define vst2q_p8(a, b) simde_vst2q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_p16(simde_poly16_t *ptr, simde_poly16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst2q_p16(ptr, val); - #else - simde_poly16_t buf[16]; - simde_poly16x8_private a_[2] = {simde_poly16x8_to_private(val.val[0]), - simde_poly16x8_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_p16 - #define vst2q_p16(a, b) simde_vst2q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_p64(simde_poly64_t *ptr, simde_poly64x2x2_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst2q_p64(ptr, val); - #else - simde_poly64_t buf[4]; - simde_poly64x2_private a_[2] = {simde_poly64x2_to_private(val.val[0]), - simde_poly64x2_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_p64 - #define vst2q_p64(a, b) simde_vst2q_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_bf16(simde_bfloat16_t *ptr, simde_bfloat16x4x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst2_bf16(ptr, val); - #else - simde_bfloat16_t buf[8]; - simde_bfloat16x4_private a_[2] = {simde_bfloat16x4_to_private(val.val[0]), - simde_bfloat16x4_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst2_bf16 - #define vst2_bf16(a, b) simde_vst2_bf16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_bf16(simde_bfloat16_t *ptr, simde_bfloat16x8x2_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst2q_bf16(ptr, val); - #else - simde_bfloat16_t buf[16]; - simde_bfloat16x8_private a_[2] = {simde_bfloat16x8_to_private(val.val[0]), - simde_bfloat16x8_to_private(val.val[1])}; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { - buf[i] = a_[i % 2].values[i / 2]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_bf16 - #define vst2q_bf16(a, b) simde_vst2q_bf16((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST2_H) */ -/* :: End simde/arm/neon/st2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st2_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST2_LANE_H) -#define SIMDE_ARM_NEON_ST2_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_s8 - #define vst2_lane_s8(a, b, c) simde_vst2_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_s16 - #define vst2_lane_s16(a, b, c) simde_vst2_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x2_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_s32 - #define vst2_lane_s32(a, b, c) simde_vst2_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x1x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst2_lane_s64(ptr, val, 0); - #else - simde_int64x1_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_s64 - #define vst2_lane_s64(a, b, c) simde_vst2_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_u8 - #define vst2_lane_u8(a, b, c) simde_vst2_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_u16 - #define vst2_lane_u16(a, b, c) simde_vst2_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x2_private r; - for (size_t i = 0 ; i < 2 ; i ++) { - r = simde_uint32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_u32 - #define vst2_lane_u32(a, b, c) simde_vst2_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst2_lane_u64(ptr, val, 0); - #else - simde_uint64x1_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_u64 - #define vst2_lane_u64(a, b, c) simde_vst2_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x4_private r; - for (size_t i = 0 ; i < 2 ; i ++) { - r = simde_float16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_f16 - #define vst2_lane_f16(a, b, c) simde_vst2_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x2_private r; - for (size_t i = 0 ; i < 2 ; i ++) { - r = simde_float32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_f32 - #define vst2_lane_f32(a, b, c) simde_vst2_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x1x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst2_lane_f64(ptr, val, 0); - #else - simde_float64x1_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_float64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_f64 - #define vst2_lane_f64(a, b, c) simde_vst2_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x16x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 16) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst2q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x16_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_s8 - #define vst2q_lane_s8(a, b, c) simde_vst2q_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_s16 - #define vst2q_lane_s16(a, b, c) simde_vst2q_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_s32 - #define vst2q_lane_s32(a, b, c) simde_vst2q_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int64x2_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_int64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_s64 - #define vst2q_lane_s64(a, b, c) simde_vst2q_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x16x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 16) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst2q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x16_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_u8 - #define vst2q_lane_u8(a, b, c) simde_vst2q_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_u16 - #define vst2q_lane_u16(a, b, c) simde_vst2q_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_u32 - #define vst2q_lane_u32(a, b, c) simde_vst2q_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint64x2_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_uint64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_u64 - #define vst2q_lane_u64(a, b, c) simde_vst2q_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_float16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_f16 - #define vst2q_lane_f16(a, b, c) simde_vst2q_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_float32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_f32 - #define vst2q_lane_f32(a, b, c) simde_vst2q_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float64x2_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_float64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_f64 - #define vst2q_lane_f64(a, b, c) simde_vst2q_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly8x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_p8 - #define vst2_lane_p8(a, b, c) simde_vst2_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly16x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x4_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_p16 - #define vst2_lane_p16(a, b, c) simde_vst2_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x1x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst2_lane_p64(ptr, val, 0); - #else - simde_poly64x1_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_p64 - #define vst2_lane_p64(a, b, c) simde_vst2_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly8x16x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 16) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst2q_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x16_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_p8 - #define vst2q_lane_p8(a, b, c) simde_vst2q_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly16x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_p16 - #define vst2q_lane_p16(a, b, c) simde_vst2q_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_poly64x2x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_p64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly64x2_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_poly64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_p64 - #define vst2q_lane_p64(a, b, c) simde_vst2q_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_bfloat16x4x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x4_private r; - for (size_t i = 0 ; i < 2 ; i ++) { - r = simde_bfloat16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst2_lane_bf16 - #define vst2_lane_bf16(a, b, c) simde_vst2_lane_bf16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst2q_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_bfloat16x8x2_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x8_private r; - for (size_t i = 0 ; i < 2 ; i++) { - r = simde_bfloat16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst2q_lane_bf16 - #define vst2q_lane_bf16(a, b, c) simde_vst2q_lane_bf16((a), (b), (c)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST2_LANE_H) */ -/* :: End simde/arm/neon/st2_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST3_H) -#define SIMDE_ARM_NEON_ST3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst3_f16(ptr, val); - #else - simde_float16x4_private a[3] = { simde_float16x4_to_private(val.val[0]), - simde_float16x4_to_private(val.val[1]), - simde_float16x4_to_private(val.val[2]) }; - simde_float16_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_f16 - #define vst3_f16(a, b) simde_vst3_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_f32(ptr, val); - #else - simde_float32x2_private a[3] = { simde_float32x2_to_private(val.val[0]), - simde_float32x2_to_private(val.val[1]), - simde_float32x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - simde_float32_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_f32 - #define vst3_f32(a, b) simde_vst3_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst3_f64(ptr, val); - #else - simde_float64x1_private a_[3] = { simde_float64x1_to_private(val.val[0]), - simde_float64x1_to_private(val.val[1]), - simde_float64x1_to_private(val.val[2]) }; - simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); - simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); - simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_f64 - #define vst3_f64(a, b) simde_vst3_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_s8(ptr, val); - #else - simde_int8x8_private a_[3] = { simde_int8x8_to_private(val.val[0]), - simde_int8x8_to_private(val.val[1]), - simde_int8x8_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[1].values, - 0, 8, 3, 1, 9, 4, 2, 10); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 8, r0, a_[2].values, - 0, 1, 8, 3, 4, 9, 6, 7); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[2].values, a_[1].values, - 2, 5, 11, 3, 6, 12, 4, 7); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 8, r1, a_[0].values, - 0, 11, 2, 3, 12, 5, 6, 13); - simde_memcpy(&ptr[8], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[2].values, - 13, 6, 0, 14, 7, 0, 15, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 8, r2, a_[1].values, - 13, 0, 1, 14, 3, 4, 15, 6); - simde_memcpy(&ptr[16], &m2, sizeof(m2)); - #else - int8_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_s8 - #define vst3_s8(a, b) simde_vst3_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_s16(ptr, val); - #else - simde_int16x4_private a_[3] = { simde_int16x4_to_private(val.val[0]), - simde_int16x4_to_private(val.val[1]), - simde_int16x4_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[0].values, a_[1].values, - 0, 4, 1, 0); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 8, r0, a_[2].values, - 0, 1, 4, 2); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[1].values, a_[2].values, - 1, 5, 2, 0); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 8, r1, a_[0].values, - 0, 1, 6, 2); - simde_memcpy(&ptr[4], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[2].values, a_[0].values, - 2, 7, 3, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 8, r2, a_[1].values, - 0, 1, 7, 2); - simde_memcpy(&ptr[8], &m2, sizeof(m2)); - #else - int16_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_s16 - #define vst3_s16(a, b) simde_vst3_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_s32(ptr, val); - #else - simde_int32x2_private a[3] = { simde_int32x2_to_private(val.val[0]), - simde_int32x2_to_private(val.val[1]), - simde_int32x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - int32_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_s32 - #define vst3_s32(a, b) simde_vst3_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_s64(ptr, val); - #else - simde_int64x1_private a_[3] = { simde_int64x1_to_private(val.val[0]), - simde_int64x1_to_private(val.val[1]), - simde_int64x1_to_private(val.val[2]) }; - simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); - simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); - simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_s64 - #define vst3_s64(a, b) simde_vst3_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_u8(ptr, val); - #else - simde_uint8x8_private a_[3] = { simde_uint8x8_to_private(val.val[0]), - simde_uint8x8_to_private(val.val[1]), - simde_uint8x8_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[1].values, - 0, 8, 3, 1, 9, 4, 2, 10); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 8, r0, a_[2].values, - 0, 1, 8, 3, 4, 9, 6, 7); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[2].values, a_[1].values, - 2, 5, 11, 3, 6, 12, 4, 7); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 8, r1, a_[0].values, - 0, 11, 2, 3, 12, 5, 6, 13); - simde_memcpy(&ptr[8], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[2].values, - 13, 6, 0, 14, 7, 0, 15, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 8, r2, a_[1].values, - 13, 0, 1, 14, 3, 4, 15, 6); - simde_memcpy(&ptr[16], &m2, sizeof(m2)); - #else - uint8_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_u8 - #define vst3_u8(a, b) simde_vst3_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_u16(ptr, val); - #else - simde_uint16x4_private a_[3] = { simde_uint16x4_to_private(val.val[0]), - simde_uint16x4_to_private(val.val[1]), - simde_uint16x4_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[0].values, a_[1].values, - 0, 4, 1, 0); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 8, r0, a_[2].values, - 0, 1, 4, 2); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[1].values, a_[2].values, - 1, 5, 2, 0); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 8, r1, a_[0].values, - 0, 1, 6, 2); - simde_memcpy(&ptr[4], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[2].values, a_[0].values, - 2, 7, 3, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 8, r2, a_[1].values, - 0, 1, 7, 2); - simde_memcpy(&ptr[8], &m2, sizeof(m2)); - #else - uint16_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_u16 - #define vst3_u16(a, b) simde_vst3_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint32x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_u32(ptr, val); - #else - simde_uint32x2_private a[3] = { simde_uint32x2_to_private(val.val[0]), - simde_uint32x2_to_private(val.val[1]), - simde_uint32x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - uint32_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_u32 - #define vst3_u32(a, b) simde_vst3_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_u64(ptr, val); - #else - simde_uint64x1_private a_[3] = { simde_uint64x1_to_private(val.val[0]), - simde_uint64x1_to_private(val.val[1]), - simde_uint64x1_to_private(val.val[2]) }; - simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); - simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); - simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_u64 - #define vst3_u64(a, b) simde_vst3_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_float16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst3q_f16(ptr, val); - #else - simde_float16x8_private a_[3] = { simde_float16x8_to_private(val.val[0]), - simde_float16x8_to_private(val.val[1]), - simde_float16x8_to_private(val.val[2]) }; - simde_float16_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_f16 - #define vst3q_f16(a, b) simde_vst3q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_f32(ptr, val); - #else - simde_float32x4_private a_[3] = { simde_float32x4_to_private(val.val[0]), - simde_float32x4_to_private(val.val[1]), - simde_float32x4_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, - 0, 4, 1, 0); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, - 0, 1, 4, 2); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, - 1, 5, 2, 0); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, - 0, 1, 6, 2); - simde_memcpy(&ptr[4], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, - 2, 7, 3, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, - 0, 1, 7, 2); - simde_memcpy(&ptr[8], &m2, sizeof(m2)); - #else - simde_float32_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_f32 - #define vst3q_f32(a, b) simde_vst3q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst3q_f64(ptr, val); - #else - simde_float64x2_private a[3] = { simde_float64x2_to_private(val.val[0]), - simde_float64x2_to_private(val.val[1]), - simde_float64x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - simde_float64_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_f64 - #define vst3q_f64(a, b) simde_vst3q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_int8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_s8(ptr, val); - #else - simde_int8x16_private a_[3] = { simde_int8x16_to_private(val.val[0]), - simde_int8x16_to_private(val.val[1]), - simde_int8x16_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[0].values, a_[1].values, - 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, - 4, 20, 10, 5); - - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 16, r0, a_[2].values, - 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[1].values, a_[2].values, - 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, - 14, 9, 25, 15, 10); - - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 16, r1, r0, - 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); - simde_memcpy(&ptr[16], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[2].values, a_[0].values, - 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, 30, 0, 14, 31, 0, 15); - - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 16, r2, r1, - 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); - simde_memcpy(&ptr[32], &m2, sizeof(m2)); - #else - int8_t buf[48]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_s8 - #define vst3q_s8(a, b) simde_vst3q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_s16(ptr, val); - #else - simde_int16x8_private a_[3] = { simde_int16x8_to_private(val.val[0]), - simde_int16x8_to_private(val.val[1]), - simde_int16x8_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[1].values, - 0, 8, 3, 1, 9, 4, 2, 10); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 16, r0, a_[2].values, - 0, 1, 8, 3, 4, 9, 6, 7); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[2].values, a_[1].values, - 2, 5, 11, 3, 6, 12, 4, 7); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 16, r1, a_[0].values, - 0, 11, 2, 3, 12, 5, 6, 13); - simde_memcpy(&ptr[8], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[2].values, - 13, 6, 0, 14, 7, 0, 15, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 16, r2, a_[1].values, - 13, 0, 1, 14, 3, 4, 15, 6); - simde_memcpy(&ptr[16], &m2, sizeof(m2)); - #else - int16_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_s16 - #define vst3q_s16(a, b) simde_vst3q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_s32(ptr, val); - #else - simde_int32x4_private a_[3] = { simde_int32x4_to_private(val.val[0]), - simde_int32x4_to_private(val.val[1]), - simde_int32x4_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, - 0, 4, 1, 0); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, - 0, 1, 4, 2); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, - 1, 5, 2, 0); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, - 0, 1, 6, 2); - simde_memcpy(&ptr[4], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, - 2, 7, 3, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, - 0, 1, 7, 2); - simde_memcpy(&ptr[8], &m2, sizeof(m2)); - #else - int32_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_s32 - #define vst3q_s32(a, b) simde_vst3q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst3q_s64(ptr, val); - #else - simde_int64x2_private a[3] = { simde_int64x2_to_private(val.val[0]), - simde_int64x2_to_private(val.val[1]), - simde_int64x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - int64_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_s64 - #define vst3q_s64(a, b) simde_vst3q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_uint8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_u8(ptr, val); - #else - simde_uint8x16_private a_[3] = {simde_uint8x16_to_private(val.val[0]), - simde_uint8x16_to_private(val.val[1]), - simde_uint8x16_to_private(val.val[2])}; - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t a = a_[0].v128; - v128_t b = a_[1].v128; - v128_t c = a_[2].v128; - - // r0 = [a0, b0, a6, a1, b1, a7, a2, b2, a8, a3, b3, a9, a4, b4, a10, a5] - v128_t r0 = wasm_i8x16_shuffle(a, b, 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, - 4, 20, 10, 5); - // m0 = [a0, b0, c0, a1, b1, c1, a2, b2, c2, a3, b3, c3, a4, b4, c4, a5] - v128_t m0 = wasm_i8x16_shuffle(r0, c, 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, - 19, 12, 13, 20, 15); - wasm_v128_store(ptr, m0); - - // r1 = [b5, c5, b11, b6, c6, b12, b7, c7, b13, b8, c8, b14, b9, c9, b15, - // b10] - v128_t r1 = wasm_i8x16_shuffle(b, c, 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, - 14, 9, 25, 15, 10); - // m1 = [b5, c5, a6, b6, c6, a7, b7, c7, a8, b8, c8, a9, b9, c9, a10, b10] - v128_t m1 = wasm_i8x16_shuffle(r1, r0, 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, - 27, 12, 13, 30, 15); - wasm_v128_store(ptr + 16, m1); - - // r2 = [c10, a11, X, c11, a12, X, c12, a13, X, c13, a14, X, c14, a15, X, - // c15] - v128_t r2 = wasm_i8x16_shuffle(c, a, 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, - 30, 0, 14, 31, 0, 15); - // m2 = [c10, a11, b11, c11, a12, b12, c12, a13, b13, c13, a14, b14, c14, - // a15, b15, c15] - v128_t m2 = wasm_i8x16_shuffle(r2, r1, 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, - 27, 12, 13, 30, 15); - wasm_v128_store(ptr + 32, m2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[0].values, a_[1].values, - 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, - 4, 20, 10, 5); - - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 16, r0, a_[2].values, - 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[1].values, a_[2].values, - 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, - 14, 9, 25, 15, 10); - - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 16, r1, r0, - 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); - simde_memcpy(&ptr[16], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[2].values, a_[0].values, - 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, 30, 0, 14, 31, 0, 15); - - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 16, r2, r1, - 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); - simde_memcpy(&ptr[32], &m2, sizeof(m2)); - #else - uint8_t buf[48]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_u8 - #define vst3q_u8(a, b) simde_vst3q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_u16(ptr, val); - #else - simde_uint16x8_private a_[3] = { simde_uint16x8_to_private(val.val[0]), - simde_uint16x8_to_private(val.val[1]), - simde_uint16x8_to_private(val.val[2]) }; - - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[1].values, - 0, 8, 3, 1, 9, 4, 2, 10); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 16, r0, a_[2].values, - 0, 1, 8, 3, 4, 9, 6, 7); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[2].values, a_[1].values, - 2, 5, 11, 3, 6, 12, 4, 7); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 16, r1, a_[0].values, - 0, 11, 2, 3, 12, 5, 6, 13); - simde_memcpy(&ptr[8], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[2].values, - 13, 6, 0, 14, 7, 0, 15, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 16, r2, a_[1].values, - 13, 0, 1, 14, 3, 4, 15, 6); - simde_memcpy(&ptr[16], &m2, sizeof(m2)); - #else - uint16_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_u16 - #define vst3q_u16(a, b) simde_vst3q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint32x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_u32(ptr, val); - #else - simde_uint32x4_private a_[3] = { simde_uint32x4_to_private(val.val[0]), - simde_uint32x4_to_private(val.val[1]), - simde_uint32x4_to_private(val.val[2]) }; - - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, - 0, 4, 1, 0); - __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, - 0, 1, 4, 2); - simde_memcpy(ptr, &m0, sizeof(m0)); - - __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, - 1, 5, 2, 0); - __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, - 0, 1, 6, 2); - simde_memcpy(&ptr[4], &m1, sizeof(m1)); - - __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, - 2, 7, 3, 0); - __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, - 0, 1, 7, 2); - simde_memcpy(&ptr[8], &m2, sizeof(m2)); - #else - uint32_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_u32 - #define vst3q_u32(a, b) simde_vst3q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst3q_u64(ptr, val); - #else - simde_uint64x2_private a[3] = { simde_uint64x2_to_private(val.val[0]), - simde_uint64x2_to_private(val.val[1]), - simde_uint64x2_to_private(val.val[2]) }; - #if defined(SIMDE_SHUFFLE_VECTOR_) - __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); - __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); - __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); - simde_memcpy(ptr, &r1, sizeof(r1)); - simde_memcpy(&ptr[2], &r2, sizeof(r2)); - simde_memcpy(&ptr[4], &r3, sizeof(r3)); - #else - uint64_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_u64 - #define vst3q_u64(a, b) simde_vst3q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly8x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_p8(ptr, val); - #else - simde_poly8x8_private a_[3] = { simde_poly8x8_to_private(val.val[0]), - simde_poly8x8_to_private(val.val[1]), - simde_poly8x8_to_private(val.val[2]) }; - simde_poly8_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_p8 - #define vst3_p8(a, b) simde_vst3_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_poly16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3_p16(ptr, val); - #else - simde_poly16x4_private a_[3] = { simde_poly16x4_to_private(val.val[0]), - simde_poly16x4_to_private(val.val[1]), - simde_poly16x4_to_private(val.val[2]) }; - simde_poly16_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_p16 - #define vst3_p16(a, b) simde_vst3_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x1x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst3_p64(ptr, val); - #else - simde_poly64x1_private a_[3] = { simde_poly64x1_to_private(val.val[0]), - simde_poly64x1_to_private(val.val[1]), - simde_poly64x1_to_private(val.val[2]) }; - simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); - simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); - simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst3_p64 - #define vst3_p64(a, b) simde_vst3_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_poly8x16x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst3q_p8(ptr, val); - #else - simde_poly8x16_private a_[3] = {simde_poly8x16_to_private(val.val[0]), - simde_poly8x16_to_private(val.val[1]), - simde_poly8x16_to_private(val.val[2])}; - simde_poly8_t buf[48]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_p8 - #define vst3q_p8(a, b) simde_vst3q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_poly16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst3q_p16(ptr, val); - #else - simde_poly16x8_private a_[3] = { simde_poly16x8_to_private(val.val[0]), - simde_poly16x8_to_private(val.val[1]), - simde_poly16x8_to_private(val.val[2]) }; - - simde_poly16_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_p16 - #define vst3q_p16(a, b) simde_vst3q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_poly64x2x3_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst3q_p64(ptr, val); - #else - simde_poly64x2_private a[3] = { simde_poly64x2_to_private(val.val[0]), - simde_poly64x2_to_private(val.val[1]), - simde_poly64x2_to_private(val.val[2]) }; - simde_poly64_t buf[6]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_p64 - #define vst3q_p64(a, b) simde_vst3q_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_bfloat16x4x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst3_bf16(ptr, val); - #else - simde_bfloat16x4_private a[3] = { simde_bfloat16x4_to_private(val.val[0]), - simde_bfloat16x4_to_private(val.val[1]), - simde_bfloat16x4_to_private(val.val[2]) }; - simde_bfloat16_t buf[12]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst3_bf16 - #define vst3_bf16(a, b) simde_vst3_bf16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_bfloat16x8x3_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst3q_bf16(ptr, val); - #else - simde_bfloat16x8_private a_[3] = { simde_bfloat16x8_to_private(val.val[0]), - simde_bfloat16x8_to_private(val.val[1]), - simde_bfloat16x8_to_private(val.val[2]) }; - simde_bfloat16_t buf[24]; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { - buf[i] = a_[i % 3].values[i / 3]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_bf16 - #define vst3q_bf16(a, b) simde_vst3q_bf16((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST3_H) */ -/* :: End simde/arm/neon/st3.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st3_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST3_LANE_H) -#define SIMDE_ARM_NEON_ST3_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_s8 - #define vst3_lane_s8(a, b, c) simde_vst3_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_s16 - #define vst3_lane_s16(a, b, c) simde_vst3_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_s32 - #define vst3_lane_s32(a, b, c) simde_vst3_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst3_lane_s64(ptr, val, 0); - #else - simde_int64x1_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_s64 - #define vst3_lane_s64(a, b, c) simde_vst3_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_u8 - #define vst3_lane_u8(a, b, c) simde_vst3_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_u16 - #define vst3_lane_u16(a, b, c) simde_vst3_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_u32 - #define vst3_lane_u32(a, b, c) simde_vst3_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst3_lane_u64(ptr, val, 0); - #else - simde_uint64x1_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_u64 - #define vst3_lane_u64(a, b, c) simde_vst3_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_f16 - #define vst3_lane_f16(a, b, c) simde_vst3_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_f32 - #define vst3_lane_f32(a, b, c) simde_vst3_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst3_lane_f64(ptr, val, 0); - #else - simde_float64x1_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_f64 - #define vst3_lane_f64(a, b, c) simde_vst3_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x16x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst3q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x16_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_s8 - #define vst3q_lane_s8(a, b, c) simde_vst3q_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_s16 - #define vst3q_lane_s16(a, b, c) simde_vst3q_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_s32 - #define vst3q_lane_s32(a, b, c) simde_vst3q_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int64x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_int64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_s64 - #define vst3q_lane_s64(a, b, c) simde_vst3q_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x16x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst3q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x16_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_u8 - #define vst3q_lane_u8(a, b, c) simde_vst3q_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_u16 - #define vst3q_lane_u16(a, b, c) simde_vst3q_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_u32 - #define vst3q_lane_u32(a, b, c) simde_vst3q_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint64x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_uint64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_u64 - #define vst3q_lane_u64(a, b, c) simde_vst3q_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_f16 - #define vst3q_lane_f16(a, b, c) simde_vst3q_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_f32 - #define vst3q_lane_f32(a, b, c) simde_vst3q_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x2x3_t val, const int lane){ - //SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float64x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_float64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_f64 - #define vst3q_lane_f64(a, b, c) simde_vst3q_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly8x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_p8 - #define vst3_lane_p8(a, b, c) simde_vst3_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly16x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_p16 - #define vst3_lane_p16(a, b, c) simde_vst3_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x1x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - HEDLEY_STATIC_CAST(void, lane); - vst3_lane_p64(ptr, val, 0); - #else - simde_poly64x1_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_p64 - #define vst3_lane_p64(a, b, c) simde_vst3_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly8x16x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst3q_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x16_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_p8 - #define vst3q_lane_p8(a, b, c) simde_vst3q_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly16x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_p16 - #define vst3q_lane_p16(a, b, c) simde_vst3q_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_poly64x2x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_p64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly64x2_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_poly64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_p64 - #define vst3q_lane_p64(a, b, c) simde_vst3q_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_bfloat16x4x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x4_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_bfloat16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst3_lane_bf16 - #define vst3_lane_bf16(a, b, c) simde_vst3_lane_bf16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst3q_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_bfloat16x8x3_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x8_private r; - for (size_t i = 0 ; i < 3 ; i++) { - r = simde_bfloat16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst3q_lane_bf16 - #define vst3q_lane_bf16(a, b, c) simde_vst3q_lane_bf16((a), (b), (c)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST3_LANE_H) */ -/* :: End simde/arm/neon/st3_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST4_H) -#define SIMDE_ARM_NEON_ST4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_f16(simde_float16_t *ptr, simde_float16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst4_f16(ptr, val); - #else - simde_float16_t buf[16]; - simde_float16x4_private a_[4] = { simde_float16x4_to_private(val.val[0]), simde_float16x4_to_private(val.val[1]), - simde_float16x4_to_private(val.val[2]), simde_float16x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_f16 - #define vst4_f16(a, b) simde_vst4_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_f32(simde_float32_t *ptr, simde_float32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_f32(ptr, val); - #else - simde_float32_t buf[8]; - simde_float32x2_private a_[4] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), - simde_float32x2_to_private(val.val[2]), simde_float32x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_f32 - #define vst4_f32(a, b) simde_vst4_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_f64(simde_float64_t *ptr, simde_float64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst4_f64(ptr, val); - #else - simde_float64_t buf[4]; - simde_float64x1_private a_[4] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), - simde_float64x1_to_private(val.val[2]), simde_float64x1_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_f64 - #define vst4_f64(a, b) simde_vst4_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_s8(int8_t *ptr, simde_int8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_s8(ptr, val); - #else - int8_t buf[32]; - simde_int8x8_private a_[4] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), - simde_int8x8_to_private(val.val[2]), simde_int8x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_s8 - #define vst4_s8(a, b) simde_vst4_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_s16(int16_t *ptr, simde_int16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_s16(ptr, val); - #else - int16_t buf[16]; - simde_int16x4_private a_[4] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), - simde_int16x4_to_private(val.val[2]), simde_int16x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_s16 - #define vst4_s16(a, b) simde_vst4_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_s32(int32_t *ptr, simde_int32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_s32(ptr, val); - #else - int32_t buf[8]; - simde_int32x2_private a_[4] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), - simde_int32x2_to_private(val.val[2]), simde_int32x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_s32 - #define vst4_s32(a, b) simde_vst4_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_s64(int64_t *ptr, simde_int64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_s64(ptr, val); - #else - int64_t buf[4]; - simde_int64x1_private a_[4] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), - simde_int64x1_to_private(val.val[2]), simde_int64x1_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_s64 - #define vst4_s64(a, b) simde_vst4_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_u8(uint8_t *ptr, simde_uint8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_u8(ptr, val); - #else - uint8_t buf[32]; - simde_uint8x8_private a_[4] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), - simde_uint8x8_to_private(val.val[2]), simde_uint8x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_u8 - #define vst4_u8(a, b) simde_vst4_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_u16(uint16_t *ptr, simde_uint16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_u16(ptr, val); - #else - uint16_t buf[16]; - simde_uint16x4_private a_[4] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), - simde_uint16x4_to_private(val.val[2]), simde_uint16x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_u16 - #define vst4_u16(a, b) simde_vst4_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_u32(uint32_t *ptr, simde_uint32x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_u32(ptr, val); - #else - uint32_t buf[8]; - simde_uint32x2_private a_[4] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), - simde_uint32x2_to_private(val.val[2]), simde_uint32x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_u32 - #define vst4_u32(a, b) simde_vst4_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_u64(uint64_t *ptr, simde_uint64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_u64(ptr, val); - #else - uint64_t buf[4]; - simde_uint64x1_private a_[4] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), - simde_uint64x1_to_private(val.val[2]), simde_uint64x1_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_u64 - #define vst4_u64(a, b) simde_vst4_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_f16(simde_float16_t *ptr, simde_float16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - vst4q_f16(ptr, val); - #else - simde_float16_t buf[32]; - simde_float16x8_private a_[4] = { simde_float16x8_to_private(val.val[0]), simde_float16x8_to_private(val.val[1]), - simde_float16x8_to_private(val.val[2]), simde_float16x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_f16 - #define vst4q_f16(a, b) simde_vst4q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_f32(simde_float32_t *ptr, simde_float32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_f32(ptr, val); - #else - simde_float32_t buf[16]; - simde_float32x4_private a_[4] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), - simde_float32x4_to_private(val.val[2]), simde_float32x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_f32 - #define vst4q_f32(a, b) simde_vst4q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_f64(simde_float64_t *ptr, simde_float64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst4q_f64(ptr, val); - #else - simde_float64_t buf[8]; - simde_float64x2_private a_[4] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), - simde_float64x2_to_private(val.val[2]), simde_float64x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_f64 - #define vst4q_f64(a, b) simde_vst4q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_s8(int8_t *ptr, simde_int8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_s8(ptr, val); - #else - int8_t buf[64]; - simde_int8x16_private a_[4] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), - simde_int8x16_to_private(val.val[2]), simde_int8x16_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_s8 - #define vst4q_s8(a, b) simde_vst4q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_s16(int16_t *ptr, simde_int16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_s16(ptr, val); - #else - int16_t buf[32]; - simde_int16x8_private a_[4] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), - simde_int16x8_to_private(val.val[2]), simde_int16x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_s16 - #define vst4q_s16(a, b) simde_vst4q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_s32(int32_t *ptr, simde_int32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_s32(ptr, val); - #else - int32_t buf[16]; - simde_int32x4_private a_[4] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), - simde_int32x4_to_private(val.val[2]), simde_int32x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_s32 - #define vst4q_s32(a, b) simde_vst4q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_s64(int64_t *ptr, simde_int64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst4q_s64(ptr, val); - #else - int64_t buf[8]; - simde_int64x2_private a_[4] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), - simde_int64x2_to_private(val.val[2]), simde_int64x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_s64 - #define vst4q_s64(a, b) simde_vst4q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_u8(uint8_t *ptr, simde_uint8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_u8(ptr, val); - #else - uint8_t buf[64]; - simde_uint8x16_private a_[4] = { simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1]), - simde_uint8x16_to_private(val.val[2]), simde_uint8x16_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_u8 - #define vst4q_u8(a, b) simde_vst4q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_u16(uint16_t *ptr, simde_uint16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_u16(ptr, val); - #else - uint16_t buf[32]; - simde_uint16x8_private a_[4] = { simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1]), - simde_uint16x8_to_private(val.val[2]), simde_uint16x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_u16 - #define vst4q_u16(a, b) simde_vst4q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_u32(uint32_t *ptr, simde_uint32x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_u32(ptr, val); - #else - uint32_t buf[16]; - simde_uint32x4_private a_[4] = { simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1]), - simde_uint32x4_to_private(val.val[2]), simde_uint32x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_u32 - #define vst4q_u32(a, b) simde_vst4q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_u64(uint64_t *ptr, simde_uint64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst4q_u64(ptr, val); - #else - uint64_t buf[8]; - simde_uint64x2_private a_[4] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), - simde_uint64x2_to_private(val.val[2]), simde_uint64x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_u64 - #define vst4q_u64(a, b) simde_vst4q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_p8(simde_poly8_t *ptr, simde_poly8x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_p8(ptr, val); - #else - simde_poly8_t buf[32]; - simde_poly8x8_private a_[4] = { simde_poly8x8_to_private(val.val[0]), simde_poly8x8_to_private(val.val[1]), - simde_poly8x8_to_private(val.val[2]), simde_poly8x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_p8 - #define vst4_p8(a, b) simde_vst4_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_p16(simde_poly16_t *ptr, simde_poly16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4_p16(ptr, val); - #else - simde_poly16_t buf[16]; - simde_poly16x4_private a_[4] = { simde_poly16x4_to_private(val.val[0]), simde_poly16x4_to_private(val.val[1]), - simde_poly16x4_to_private(val.val[2]), simde_poly16x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_p16 - #define vst4_p16(a, b) simde_vst4_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_p64(simde_poly64_t *ptr, simde_poly64x1x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - vst4_p64(ptr, val); - #else - simde_poly64_t buf[4]; - simde_poly64x1_private a_[4] = { simde_poly64x1_to_private(val.val[0]), simde_poly64x1_to_private(val.val[1]), - simde_poly64x1_to_private(val.val[2]), simde_poly64x1_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst4_p64 - #define vst4_p64(a, b) simde_vst4_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_p8(simde_poly8_t *ptr, simde_poly8x16x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_p8(ptr, val); - #else - simde_poly8_t buf[64]; - simde_poly8x16_private a_[4] = { simde_poly8x16_to_private(val.val[0]), simde_poly8x16_to_private(val.val[1]), - simde_poly8x16_to_private(val.val[2]), simde_poly8x16_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_p8 - #define vst4q_p8(a, b) simde_vst4q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_p16(simde_poly16_t *ptr, simde_poly16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst4q_p16(ptr, val); - #else - simde_poly16_t buf[32]; - simde_poly16x8_private a_[4] = { simde_poly16x8_to_private(val.val[0]), simde_poly16x8_to_private(val.val[1]), - simde_poly16x8_to_private(val.val[2]), simde_poly16x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_p16 - #define vst4q_p16(a, b) simde_vst4q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_p64(simde_poly64_t *ptr, simde_poly64x2x4_t val) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst4q_p64(ptr, val); - #else - simde_poly64_t buf[8]; - simde_poly64x2_private a_[4] = { simde_poly64x2_to_private(val.val[0]), simde_poly64x2_to_private(val.val[1]), - simde_poly64x2_to_private(val.val[2]), simde_poly64x2_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_p64 - #define vst4q_p64(a, b) simde_vst4q_p64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_bf16(simde_bfloat16_t *ptr, simde_bfloat16x4x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst4_bf16(ptr, val); - #else - simde_bfloat16_t buf[16]; - simde_bfloat16x4_private a_[4] = { simde_bfloat16x4_to_private(val.val[0]), simde_bfloat16x4_to_private(val.val[1]), - simde_bfloat16x4_to_private(val.val[2]), simde_bfloat16x4_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst4_bf16 - #define vst4_bf16(a, b) simde_vst4_bf16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_bf16(simde_bfloat16_t *ptr, simde_bfloat16x8x4_t val) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - vst4q_bf16(ptr, val); - #else - simde_bfloat16_t buf[32]; - simde_bfloat16x8_private a_[4] = { simde_bfloat16x8_to_private(val.val[0]), simde_bfloat16x8_to_private(val.val[1]), - simde_bfloat16x8_to_private(val.val[2]), simde_bfloat16x8_to_private(val.val[3]) }; - for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { - buf[i] = a_[i % 4].values[i / 4]; - } - simde_memcpy(ptr, buf, sizeof(buf)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_bf16 - #define vst4q_bf16(a, b) simde_vst4q_bf16((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST4_H) */ -/* :: End simde/arm/neon/st4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/st4_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - * 2021 Zhi An Ng (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_ST4_LANE_H) -#define SIMDE_ARM_NEON_ST4_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_s8 - #define vst4_lane_s8(a, b, c) simde_vst4_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_s16 - #define vst4_lane_s16(a, b, c) simde_vst4_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_s32 - #define vst4_lane_s32(a, b, c) simde_vst4_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst4_lane_s64(ptr, val, 0); - #else - simde_int64x1_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_s64 - #define vst4_lane_s64(a, b, c) simde_vst4_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_u8 - #define vst4_lane_u8(a, b, c) simde_vst4_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_u16 - #define vst4_lane_u16(a, b, c) simde_vst4_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_u32 - #define vst4_lane_u32(a, b, c) simde_vst4_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst4_lane_u64(ptr, val, 0); - #else - simde_uint64x1_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_u64 - #define vst4_lane_u64(a, b, c) simde_vst4_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } -} -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vst4_lane_f16(a, b, c) vst4_lane_f16((a), (b), (c)) -#endif -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_f16 - #define vst4_lane_f16(a, b, c) simde_vst4_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float32x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_f32 - #define vst4_lane_f32(a, b, c) simde_vst4_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst4_lane_f64(ptr, val, 0); - #else - simde_float64x1_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_f64 - #define vst4_lane_f64(a, b, c) simde_vst4_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst4q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int8x16_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_s8 - #define vst4q_lane_s8(a, b, c) simde_vst4q_lane_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int16x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_s16 - #define vst4q_lane_s16(a, b, c) simde_vst4q_lane_s16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int32x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_s32 - #define vst4q_lane_s32(a, b, c) simde_vst4q_lane_s32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_int64x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_int64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_s64 - #define vst4q_lane_s64(a, b, c) simde_vst4q_lane_s64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst4q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint8x16_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_u8 - #define vst4q_lane_u8(a, b, c) simde_vst4q_lane_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint16x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_u16 - #define vst4q_lane_u16(a, b, c) simde_vst4q_lane_u16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint32x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_u32 - #define vst4q_lane_u32(a, b, c) simde_vst4q_lane_u32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_uint64x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_uint64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_u64 - #define vst4q_lane_u64(a, b, c) simde_vst4q_lane_u64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float16x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_f16 - #define vst4q_lane_f16(a, b, c) simde_vst4q_lane_f16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_float32x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float32x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_f32 - #define vst4q_lane_f32(a, b, c) simde_vst4q_lane_f32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst4q_lane_f64(ptr, val, 0); - #else - simde_float64x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_float64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_f64 - #define vst4q_lane_f64(a, b, c) simde_vst4q_lane_f64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly8x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly8x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_p8 - #define vst4_lane_p8(a, b, c) simde_vst4_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly16x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_p16 - #define vst4_lane_p16(a, b, c) simde_vst4_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x1x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - (void) lane; - vst4_lane_p64(ptr, val, 0); - #else - simde_poly64x1_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly64x1_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_p64 - #define vst4_lane_p64(a, b, c) simde_vst4_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_p8(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly8x16x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_16_NO_RESULT_(vst4q_lane_p8, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly8x16_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly8x16_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_p8 - #define vst4q_lane_p8(a, b, c) simde_vst4q_lane_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_p16(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly16x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_p16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly16x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_p16 - #define vst4q_lane_p16(a, b, c) simde_vst4q_lane_p16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_p64(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x2x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_CONSTIFY_2_NO_RESULT_(vst4q_lane_p64, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_poly64x2_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_poly64x2_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_p64 - #define vst4q_lane_p64(a, b, c) simde_vst4q_lane_p64((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x4x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x4_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_bfloat16x4_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst4_lane_bf16 - #define vst4_lane_bf16(a, b, c) simde_vst4_lane_bf16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_vst4q_lane_bf16(simde_bfloat16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_bfloat16x8x4_t val, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_BF16) - SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_bf16, HEDLEY_UNREACHABLE(), lane, ptr, val); - #else - simde_bfloat16x8_private r; - for (size_t i = 0 ; i < 4 ; i++) { - r = simde_bfloat16x8_to_private(val.val[i]); - ptr[i] = r.values[lane]; - } - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vst4q_lane_bf16 - #define vst4q_lane_bf16(a, b, c) simde_vst4q_lane_bf16((a), (b), (c)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_ST4_LANE_H) */ -/* :: End simde/arm/neon/st4_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subhn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_SUBHN_H) -#define SIMDE_ARM_NEON_SUBHN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vsubhn_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_s16(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int8x8_private r_; - simde_int8x16_private tmp_ = - simde_int8x16_to_private( - simde_vreinterpretq_s8_s16( - simde_vsubq_s16(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #endif - return simde_int8x8_from_private(r_); - #else - return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubq_s16(a, b), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_s16 - #define vsubhn_s16(a, b) simde_vsubhn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vsubhn_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_s32(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int16x4_private r_; - simde_int16x8_private tmp_ = - simde_int16x8_to_private( - simde_vreinterpretq_s16_s32( - simde_vsubq_s32(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); - #endif - return simde_int16x4_from_private(r_); - #else - return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubq_s32(a, b), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_s32 - #define vsubhn_s32(a, b) simde_vsubhn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vsubhn_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_s64(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_int32x2_private r_; - simde_int32x4_private tmp_ = - simde_int32x4_to_private( - simde_vreinterpretq_s32_s64( - simde_vsubq_s64(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); - #endif - return simde_int32x2_from_private(r_); - #else - return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubq_s64(a, b), 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_s64 - #define vsubhn_s64(a, b) simde_vsubhn_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vsubhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_u16(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint8x8_private r_; - simde_uint8x16_private tmp_ = - simde_uint8x16_to_private( - simde_vreinterpretq_u8_u16( - simde_vsubq_u16(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); - #endif - return simde_uint8x8_from_private(r_); - #else - return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubq_u16(a, b), 8)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_u16 - #define vsubhn_u16(a, b) simde_vsubhn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vsubhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_u32(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint16x4_private r_; - simde_uint16x8_private tmp_ = - simde_uint16x8_to_private( - simde_vreinterpretq_u16_u32( - simde_vsubq_u32(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); - #endif - return simde_uint16x4_from_private(r_); - #else - return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubq_u32(a, b), 16)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_u32 - #define vsubhn_u32(a, b) simde_vsubhn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vsubhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubhn_u64(a, b); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - simde_uint32x2_private r_; - simde_uint32x4_private tmp_ = - simde_uint32x4_to_private( - simde_vreinterpretq_u32_u64( - simde_vsubq_u64(a, b) - ) - ); - #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); - #else - r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); - #endif - return simde_uint32x2_from_private(r_); - #else - return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubq_u64(a, b), 32)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubhn_u64 - #define vsubhn_u64(a, b) simde_vsubhn_u64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBHN_H) */ -/* :: End simde/arm/neon/subhn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subhn_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SUBHN_HIGH_H) -#define SIMDE_ARM_NEON_SUBHN_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_s16(r, a, b) vsubhn_high_s16((r), (a), (b)) -#else - #define simde_vsubhn_high_s16(r, a, b) simde_vcombine_s8(r, simde_vsubhn_s16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_s16 - #define vsubhn_high_s16(r, a, b) simde_vsubhn_high_s16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_s32(r, a, b) vsubhn_high_s32((r), (a), (b)) -#else - #define simde_vsubhn_high_s32(r, a, b) simde_vcombine_s16(r, simde_vsubhn_s32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_s32 - #define vsubhn_high_s32(r, a, b) simde_vsubhn_high_s32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_s64(r, a, b) vsubhn_high_s64((r), (a), (b)) -#else - #define simde_vsubhn_high_s64(r, a, b) simde_vcombine_s32(r, simde_vsubhn_s64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_s64 - #define vsubhn_high_s64(r, a, b) simde_vsubhn_high_s64((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_u16(r, a, b) vsubhn_high_u16((r), (a), (b)) -#else - #define simde_vsubhn_high_u16(r, a, b) simde_vcombine_u8(r, simde_vsubhn_u16(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_u16 - #define vsubhn_high_u16(r, a, b) simde_vsubhn_high_u16((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_u32(r, a, b) vsubhn_high_u32((r), (a), (b)) -#else - #define simde_vsubhn_high_u32(r, a, b) simde_vcombine_u16(r, simde_vsubhn_u32(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_u32 - #define vsubhn_high_u32(r, a, b) simde_vsubhn_high_u32((r), (a), (b)) -#endif - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vsubhn_high_u64(r, a, b) vsubhn_high_u64((r), (a), (b)) -#else - #define simde_vsubhn_high_u64(r, a, b) simde_vcombine_u32(r, simde_vsubhn_u64(a, b)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubhn_high_u64 - #define vsubhn_high_u64(r, a, b) simde_vsubhn_high_u64((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBHN_HIGH_H) */ -/* :: End simde/arm/neon/subhn_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subl_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Décio Luiz Gazzoni Filho - */ - -#if !defined(SIMDE_ARM_NEON_SUBL_HIGH_H) -#define SIMDE_ARM_NEON_SUBL_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsubl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_s8(a, b); - #else - return simde_vsubq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_s8 - #define vsubl_high_s8(a, b) simde_vsubl_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsubl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_s16(a, b); - #else - return simde_vsubq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_s16 - #define vsubl_high_s16(a, b) simde_vsubl_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsubl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_s32(a, b); - #else - return simde_vsubq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_s32 - #define vsubl_high_s32(a, b) simde_vsubl_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsubl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_u8(a, b); - #else - return simde_vsubq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_u8 - #define vsubl_high_u8(a, b) simde_vsubl_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsubl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_u16(a, b); - #else - return simde_vsubq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_u16 - #define vsubl_high_u16(a, b) simde_vsubl_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsubl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubl_high_u32(a, b); - #else - return simde_vsubq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubl_high_u32 - #define vsubl_high_u32(a, b) simde_vsubl_high_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBL_HIGH_H) */ -/* :: End simde/arm/neon/subl_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subw.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_SUBW_H) -#define SIMDE_ARM_NEON_SUBW_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsubw_s8(simde_int16x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s16(a, simde_vmovl_s8(b)); - #else - simde_int16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_s8 - #define vsubw_s8(a, b) simde_vsubw_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsubw_s16(simde_int32x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s32(a, simde_vmovl_s16(b)); - #else - simde_int32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_int16x4_private b_ = simde_int16x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_s16 - #define vsubw_s16(a, b) simde_vsubw_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsubw_s32(simde_int64x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s64(a, simde_vmovl_s32(b)); - #else - simde_int64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_int32x2_private b_ = simde_int32x2_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_s32 - #define vsubw_s32(a, b) simde_vsubw_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsubw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u16(a, simde_vmovl_u8(b)); - #else - simde_uint16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_u8 - #define vsubw_u8(a, b) simde_vsubw_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsubw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u32(a, simde_vmovl_u16(b)); - #else - simde_uint32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_uint16x4_private b_ = simde_uint16x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_u16 - #define vsubw_u16(a, b) simde_vsubw_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsubw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsubw_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u64(a, simde_vmovl_u32(b)); - #else - simde_uint64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_uint32x2_private b_ = simde_uint32x2_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vsubw_u32 - #define vsubw_u32(a, b) simde_vsubw_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBW_H) */ -/* :: End simde/arm/neon/subw.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/subw_high.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) -#define SIMDE_ARM_NEON_SUBW_HIGH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vsubw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_s8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s16(a, simde_vmovl_high_s8(b)); - #else - simde_int16x8_private r_; - simde_int16x8_private a_ = simde_int16x8_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_s8 - #define vsubw_high_s8(a, b) simde_vsubw_high_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsubw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_s16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s32(a, simde_vmovl_high_s16(b)); - #else - simde_int32x4_private r_; - simde_int32x4_private a_ = simde_int32x4_to_private(a); - simde_int16x8_private b_ = simde_int16x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_s16 - #define vsubw_high_s16(a, b) simde_vsubw_high_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vsubw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_s32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_s64(a, simde_vmovl_high_s32(b)); - #else - simde_int64x2_private r_; - simde_int64x2_private a_ = simde_int64x2_to_private(a); - simde_int32x4_private b_ = simde_int32x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_s32 - #define vsubw_high_s32(a, b) simde_vsubw_high_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vsubw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_u8(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u16(a, simde_vmovl_high_u8(b)); - #else - simde_uint16x8_private r_; - simde_uint16x8_private a_ = simde_uint16x8_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_u8 - #define vsubw_high_u8(a, b) simde_vsubw_high_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vsubw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_u16(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u32(a, simde_vmovl_high_u16(b)); - #else - simde_uint32x4_private r_; - simde_uint32x4_private a_ = simde_uint32x4_to_private(a); - simde_uint16x8_private b_ = simde_uint16x8_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_u16 - #define vsubw_high_u16(a, b) simde_vsubw_high_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vsubw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsubw_high_u32(a, b); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_vsubq_u64(a, simde_vmovl_high_u32(b)); - #else - simde_uint64x2_private r_; - simde_uint64x2_private a_ = simde_uint64x2_to_private(a); - simde_uint32x4_private b_ = simde_uint32x4_to_private(b); - - #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.values, b_.values); - r_.values -= a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; - } - #endif - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsubw_high_u32 - #define vsubw_high_u32(a, b) simde_vsubw_high_u32((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) */ -/* :: End simde/arm/neon/subw_high.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/sudot_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_SUDOT_LANE_H) -#define SIMDE_ARM_NEON_SUDOT_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vsudot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_uint8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_t result; - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vsudot_lane_s32(r, a, b, lane) vsudot_lane_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsudot_lane_s32 - #define vsudot_lane_s32(r, a, b, lane) simde_vsudot_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vsudot_laneq_s32(simde_int32x2_t r, simde_int8x8_t a, simde_uint8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x2_t result; - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_int8x8_private a_ = simde_int8x8_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - - return result; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vsudot_laneq_s32(r, a, b, lane) vsudot_laneq_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsudot_laneq_s32 - #define vsudot_laneq_s32(r, a, b, lane) simde_vsudot_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsudotq_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_uint8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_t result; - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - return result; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vsudotq_laneq_s32(r, a, b, lane) vsudotq_laneq_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vsudotq_laneq_s32 - #define vsudotq_laneq_s32(r, a, b, lane) simde_vsudotq_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vsudotq_lane_s32(simde_int32x4_t r, simde_int8x16_t a, simde_uint8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x4_t result; - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_int8x16_private a_ = simde_int8x16_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vsudotq_lane_s32(r, a, b, lane) vsudotq_lane_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vsudotq_lane_s32 - #define vsudotq_lane_s32(r, a, b, lane) simde_vsudotq_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_SUDOT_LANE_H) */ -/* :: End simde/arm/neon/sudot_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/tbl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TBL_H) -#define SIMDE_ARM_NEON_TBL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbl1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl1_u8(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a)), - b_ = simde_uint8x16_to_private(simde_vcombine_u8(b, b)); - - r_.v128 = wasm_i8x16_swizzle(a_.v128, b_.v128); - r_.v128 = wasm_v128_and(r_.v128, wasm_u8x16_lt(b_.v128, wasm_i8x16_splat(8))); - - return simde_vget_low_u8(simde_uint8x16_from_private(r_)); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_or_si64(b_.m64, _mm_cmpgt_pi8(b_.m64, _mm_set1_pi8(7)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] < 8) ? a_.values[b_.values[i]] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl1_u8 - #define vtbl1_u8(a, b) simde_vtbl1_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbl1_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl1_s8(a, b); - #else - return simde_vreinterpret_s8_u8(simde_vtbl1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl1_s8 - #define vtbl1_s8(a, b) simde_vtbl1_s8((a), (b)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbl2_u8(simde_uint8x8x2_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl2_u8(a, b); - #else - simde_uint8x8_private - r_, - a_[2] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]) }, - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_set_epi64(a_[1].m64, a_[0].m64); - __m128i b128 = _mm_set1_epi64(b_.m64); - __m128i r128 = _mm_shuffle_epi8(a128, _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(15)))); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] < 16) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl2_u8 - #define vtbl2_u8(a, b) simde_vtbl2_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbl2_s8(simde_int8x8x2_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl2_s8(a, b); - #else - simde_uint8x8x2_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_s8_u8(simde_vtbl2_u8(a_, simde_vreinterpret_u8_s8(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl2_s8 - #define vtbl2_s8(a, b) simde_vtbl2_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbl3_u8(simde_uint8x8x3_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl3_u8(a, b); - #else - simde_uint8x8_private - r_, - a_[3] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]) }, - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i b128 = _mm_set1_epi64(b_.m64); - b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(23))); - __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a_[1].m64, a_[0].m64), b128); - __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(a_[2].m64), b128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(b128, 3)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] < 24) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl3_u8 - #define vtbl3_u8(a, b) simde_vtbl3_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbl3_s8(simde_int8x8x3_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl3_s8(a, b); - #else - simde_uint8x8x3_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_s8_u8(simde_vtbl3_u8(a_, simde_vreinterpret_u8_s8(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl3_s8 - #define vtbl3_s8(a, b) simde_vtbl3_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbl4_u8(simde_uint8x8x4_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl4_u8(a, b); - #else - simde_uint8x8_private - r_, - a_[4] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]), simde_uint8x8_to_private(a.val[3]) }, - b_ = simde_uint8x8_to_private(b); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i b128 = _mm_set1_epi64(b_.m64); - b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(31))); - __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a_[1].m64, a_[0].m64), b128); - __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(a_[3].m64, a_[2].m64), b128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(b128, 3)); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (b_.values[i] < 32) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl4_u8 - #define vtbl4_u8(a, b) simde_vtbl4_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbl4_s8(simde_int8x8x4_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl4_s8(a, b); - #else - simde_uint8x8x4_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_s8_u8(simde_vtbl4_u8(a_, simde_vreinterpret_u8_s8(b))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl4_s8 - #define vtbl4_s8(a, b) simde_vtbl4_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbl1_p8(simde_poly8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl1_p8(a, b); - #else - return simde_vreinterpret_p8_u8(simde_vtbl1_u8(simde_vreinterpret_u8_p8(a), b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl1_p8 - #define vtbl1_p8(a, b) simde_vtbl1_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbl2_p8(simde_poly8x8x2_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl2_p8(a, b); - #else - simde_uint8x8x2_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_p8_u8(simde_vtbl2_u8(a_, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl2_p8 - #define vtbl2_p8(a, b) simde_vtbl2_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbl3_p8(simde_poly8x8x3_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl3_p8(a, b); - #else - simde_uint8x8x3_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_p8_u8(simde_vtbl3_u8(a_, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl3_p8 - #define vtbl3_p8(a, b) simde_vtbl3_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbl4_p8(simde_poly8x8x4_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbl4_p8(a, b); - #else - simde_uint8x8x4_t a_; - simde_memcpy(&a_, &a, sizeof(a_)); - return simde_vreinterpret_p8_u8(simde_vtbl4_u8(a_, b)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbl4_p8 - #define vtbl4_p8(a, b) simde_vtbl4_p8((a), (b)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TBL_H) */ -/* :: End simde/arm/neon/tbl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/tbx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TBX_H) -#define SIMDE_ARM_NEON_TBX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbx1_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx1_u8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b), - c_ = simde_uint8x8_to_private(c); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_set1_epi64(a_.m64); - __m128i b128 = _mm_set1_epi64(b_.m64); - __m128i c128 = _mm_set1_epi64(c_.m64); - c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(7))); - __m128i r128 = _mm_shuffle_epi8(b128, c128); - r128 = _mm_blendv_epi8(r128, a128, c128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (c_.values[i] < 8) ? b_.values[c_.values[i]] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx1_u8 - #define vtbx1_u8(a, b, c) simde_vtbx1_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbx1_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx1_s8(a, b, c); - #else - return simde_vreinterpret_s8_u8(simde_vtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b), simde_vreinterpret_u8_s8(c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx1_s8 - #define vtbx1_s8(a, b, c) simde_vtbx1_s8((a), (b), (c)) -#endif - -#if !defined(SIMDE_BUG_INTEL_857088) - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbx2_u8(simde_uint8x8_t a, simde_uint8x8x2_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx2_u8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_[2] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]) }, - c_ = simde_uint8x8_to_private(c); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_set1_epi64(a_.m64); - __m128i b128 = _mm_set_epi64(b_[1].m64, b_[0].m64); - __m128i c128 = _mm_set1_epi64(c_.m64); - c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(15))); - __m128i r128 = _mm_shuffle_epi8(b128, c128); - r128 = _mm_blendv_epi8(r128, a128, c128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (c_.values[i] < 16) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx2_u8 - #define vtbx2_u8(a, b, c) simde_vtbx2_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbx2_s8(simde_int8x8_t a, simde_int8x8x2_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx2_s8(a, b, c); - #else - simde_uint8x8x2_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_s8_u8(simde_vtbx2_u8(simde_vreinterpret_u8_s8(a), - b_, - simde_vreinterpret_u8_s8(c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx2_s8 - #define vtbx2_s8(a, b, c) simde_vtbx2_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbx3_u8(simde_uint8x8_t a, simde_uint8x8x3_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx3_u8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_[3] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]) }, - c_ = simde_uint8x8_to_private(c); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_set1_epi64(a_.m64); - __m128i c128 = _mm_set1_epi64(c_.m64); - c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(23))); - __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b_[1].m64, b_[0].m64), c128); - __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(b_[2].m64), c128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(c128, 3)); - r128 = _mm_blendv_epi8(r128, a128, c128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (c_.values[i] < 24) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx3_u8 - #define vtbx3_u8(a, b, c) simde_vtbx3_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbx3_s8(simde_int8x8_t a, simde_int8x8x3_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx3_s8(a, b, c); - #else - simde_uint8x8x3_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_s8_u8(simde_vtbx3_u8(simde_vreinterpret_u8_s8(a), - b_, - simde_vreinterpret_u8_s8(c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx3_s8 - #define vtbx3_s8(a, b, c) simde_vtbx3_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtbx4_u8(simde_uint8x8_t a, simde_uint8x8x4_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx4_u8(a, b, c); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_[4] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]), simde_uint8x8_to_private(b.val[3]) }, - c_ = simde_uint8x8_to_private(c); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - __m128i a128 = _mm_set1_epi64(a_.m64); - __m128i c128 = _mm_set1_epi64(c_.m64); - c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(31))); - __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b_[1].m64, b_[0].m64), c128); - __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(b_[3].m64, b_[2].m64), c128); - __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(c128, 3)); - r128 = _mm_blendv_epi8(r128, a128, c128); - r_.m64 = _mm_movepi64_pi64(r128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = (c_.values[i] < 32) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; - } - #endif - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx4_u8 - #define vtbx4_u8(a, b, c) simde_vtbx4_u8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtbx4_s8(simde_int8x8_t a, simde_int8x8x4_t b, simde_int8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx4_s8(a, b, c); - #else - simde_uint8x8x4_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_s8_u8(simde_vtbx4_u8(simde_vreinterpret_u8_s8(a), - b_, - simde_vreinterpret_u8_s8(c))); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx4_s8 - #define vtbx4_s8(a, b, c) simde_vtbx4_s8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbx1_p8(simde_poly8x8_t a, simde_poly8x8_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx1_p8(a, b, c); - #else - return simde_vreinterpret_p8_u8(simde_vtbx1_u8(simde_vreinterpret_u8_p8(a), simde_vreinterpret_u8_p8(b), c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx1_p8 - #define vtbx1_p8(a, b, c) simde_vtbx1_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbx2_p8(simde_poly8x8_t a, simde_poly8x8x2_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx2_p8(a, b, c); - #else - simde_uint8x8x2_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_p8_u8(simde_vtbx2_u8(simde_vreinterpret_u8_p8(a), - b_, - c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx2_p8 - #define vtbx2_p8(a, b, c) simde_vtbx2_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbx3_p8(simde_poly8x8_t a, simde_poly8x8x3_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx3_p8(a, b, c); - #else - simde_uint8x8x3_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_p8_u8(simde_vtbx3_u8(simde_vreinterpret_u8_p8(a), - b_, - c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx3_p8 - #define vtbx3_p8(a, b, c) simde_vtbx3_p8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtbx4_p8(simde_poly8x8_t a, simde_poly8x8x4_t b, simde_uint8x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtbx4_p8(a, b, c); - #else - simde_uint8x8x4_t b_; - simde_memcpy(&b_, &b, sizeof(b_)); - return simde_vreinterpret_p8_u8(simde_vtbx4_u8(simde_vreinterpret_u8_p8(a), - b_, - c)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtbx4_p8 - #define vtbx4_p8(a, b, c) simde_vtbx4_p8((a), (b), (c)) -#endif - -#endif /* !defined(SIMDE_BUG_INTEL_857088) */ - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TBX_H) */ -/* :: End simde/arm/neon/tbx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/trn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TRN_H) && !defined(SIMDE_BUG_INTEL_857088) -#define SIMDE_ARM_NEON_TRN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/trn1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TRN1_H) -#define SIMDE_ARM_NEON_TRN1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vtrn1_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrn1_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_f16 - #define vtrn1_f16(a, b) simde_vtrn1_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vtrn1_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_f32 - #define vtrn1_f32(a, b) simde_vtrn1_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtrn1_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_s8 - #define vtrn1_s8(a, b) simde_vtrn1_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vtrn1_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_s16 - #define vtrn1_s16(a, b) simde_vtrn1_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vtrn1_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_s32 - #define vtrn1_s32(a, b) simde_vtrn1_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtrn1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_u8 - #define vtrn1_u8(a, b) simde_vtrn1_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vtrn1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_u16 - #define vtrn1_u16(a, b) simde_vtrn1_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vtrn1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_u32 - #define vtrn1_u32(a, b) simde_vtrn1_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vtrn1q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrn1q_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_f16 - #define vtrn1q_f16(a, b) simde_vtrn1q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vtrn1q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_f32(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_f32 - #define vtrn1q_f32(a, b) simde_vtrn1q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vtrn1q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_f64 - #define vtrn1q_f64(a, b) simde_vtrn1q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vtrn1q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_s8 - #define vtrn1q_s8(a, b) simde_vtrn1q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vtrn1q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_s16 - #define vtrn1q_s16(a, b) simde_vtrn1q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vtrn1q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_s32 - #define vtrn1q_s32(a, b) simde_vtrn1q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vtrn1q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_s64 - #define vtrn1q_s64(a, b) simde_vtrn1q_s64((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vtrn1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_u8 - #define vtrn1q_u8(a, b) simde_vtrn1q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vtrn1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_u16 - #define vtrn1q_u16(a, b) simde_vtrn1q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vtrn1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_u32 - #define vtrn1q_u32(a, b) simde_vtrn1q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vtrn1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_u64 - #define vtrn1q_u64(a, b) simde_vtrn1q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtrn1_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_p8(a, b); - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_p8 - #define vtrn1_p8(a, b) simde_vtrn1_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vtrn1_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1_p16(a, b); - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1_p16 - #define vtrn1_p16(a, b) simde_vtrn1_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vtrn1q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_p8(a, b); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_p8 - #define vtrn1q_p8(a, b) simde_vtrn1q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vtrn1q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_p16(a, b); - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_p16 - #define vtrn1q_p16(a, b) simde_vtrn1q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vtrn1q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn1q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx]; - r_.values[idx | 1] = b_.values[idx]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn1q_p64 - #define vtrn1q_p64(a, b) simde_vtrn1q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TRN1_H) */ -/* :: End simde/arm/neon/trn1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/trn2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Sean Maher (Copyright owned by Google, LLC) - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_TRN2_H) -#define SIMDE_ARM_NEON_TRN2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vtrn2_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrn2_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_float16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_f16 - #define vtrn2_f16(a, b) simde_vtrn2_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_f32(a, b); - #else - simde_float32x2_private - r_, - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_float32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_f32 - #define vtrn2_f32(a, b) simde_vtrn2_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vtrn2_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a), - b_ = simde_int8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_s8 - #define vtrn2_s8(a, b) simde_vtrn2_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vtrn2_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a), - b_ = simde_int16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_s16 - #define vtrn2_s16(a, b) simde_vtrn2_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vtrn2_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a), - b_ = simde_int32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_s32 - #define vtrn2_s32(a, b) simde_vtrn2_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8_t -simde_vtrn2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_u8(a, b); - #else - simde_uint8x8_private - r_, - a_ = simde_uint8x8_to_private(a), - b_ = simde_uint8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_u8 - #define vtrn2_u8(a, b) simde_vtrn2_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4_t -simde_vtrn2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_u16(a, b); - #else - simde_uint16x4_private - r_, - a_ = simde_uint16x4_to_private(a), - b_ = simde_uint16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_u16 - #define vtrn2_u16(a, b) simde_vtrn2_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2_t -simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_u32(a, b); - #else - simde_uint32x2_private - r_, - a_ = simde_uint32x2_to_private(a), - b_ = simde_uint32x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_u32 - #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vtrn2q_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrn2q_f16(a, b); - #else - simde_float16x8_private - r_, - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_float16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_f16 - #define vtrn2q_f16(a, b) simde_vtrn2q_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_f32(a, b); - #else - simde_float32x4_private - r_, - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_float32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_f32 - #define vtrn2q_f32(a, b) simde_vtrn2q_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vtrn2q_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_f64(a, b); - #else - simde_float64x2_private - r_, - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_float64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_f64 - #define vtrn2q_f64(a, b) simde_vtrn2q_f64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vtrn2q_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a), - b_ = simde_int8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_s8 - #define vtrn2q_s8(a, b) simde_vtrn2q_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vtrn2q_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a), - b_ = simde_int16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_s16 - #define vtrn2q_s16(a, b) simde_vtrn2q_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vtrn2q_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a), - b_ = simde_int32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_s32 - #define vtrn2q_s32(a, b) simde_vtrn2q_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vtrn2q_s64(simde_int64x2_t a, simde_int64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a), - b_ = simde_int64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_s64 - #define vtrn2q_s64(a, b) simde_vtrn2q_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16_t -simde_vtrn2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_u8(a, b); - #else - simde_uint8x16_private - r_, - a_ = simde_uint8x16_to_private(a), - b_ = simde_uint8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_u8 - #define vtrn2q_u8(a, b) simde_vtrn2q_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8_t -simde_vtrn2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_u16(a, b); - #else - simde_uint16x8_private - r_, - a_ = simde_uint16x8_to_private(a), - b_ = simde_uint16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_u16 - #define vtrn2q_u16(a, b) simde_vtrn2q_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4_t -simde_vtrn2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_u32(a, b); - #else - simde_uint32x4_private - r_, - a_ = simde_uint32x4_to_private(a), - b_ = simde_uint32x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_u32 - #define vtrn2q_u32(a, b) simde_vtrn2q_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vtrn2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_u64(a, b); - #else - simde_uint64x2_private - r_, - a_ = simde_uint64x2_to_private(a), - b_ = simde_uint64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_uint64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_u64 - #define vtrn2q_u64(a, b) simde_vtrn2q_u64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8_t -simde_vtrn2_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_p8(a, b); - #else - simde_poly8x8_private - r_, - a_ = simde_poly8x8_to_private(a), - b_ = simde_poly8x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_poly8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_p8 - #define vtrn2_p8(a, b) simde_vtrn2_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4_t -simde_vtrn2_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2_p16(a, b); - #else - simde_poly16x4_private - r_, - a_ = simde_poly16x4_to_private(a), - b_ = simde_poly16x4_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_poly16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2_p16 - #define vtrn2_p16(a, b) simde_vtrn2_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16_t -simde_vtrn2q_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_p8(a, b); - #else - simde_poly8x16_private - r_, - a_ = simde_poly8x16_to_private(a), - b_ = simde_poly8x16_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_poly8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_p8 - #define vtrn2q_p8(a, b) simde_vtrn2q_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8_t -simde_vtrn2q_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_p16(a, b); - #else - simde_poly16x8_private - r_, - a_ = simde_poly16x8_to_private(a), - b_ = simde_poly16x8_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_poly16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_p16 - #define vtrn2q_p16(a, b) simde_vtrn2q_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly64x2_t -simde_vtrn2q_p64(simde_poly64x2_t a, simde_poly64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vtrn2q_p64(a, b); - #else - simde_poly64x2_private - r_, - a_ = simde_poly64x2_to_private(a), - b_ = simde_poly64x2_to_private(b); - - const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway_point ; i++) { - const size_t idx = i << 1; - r_.values[idx] = a_.values[idx | 1]; - r_.values[idx | 1] = b_.values[idx | 1]; - } - - return simde_poly64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vtrn2q_p64 - #define vtrn2q_p64(a, b) simde_vtrn2q_p64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TRN2_H) */ -/* :: End simde/arm/neon/trn2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4x2_t -simde_vtrn_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrn_f16(a, b); - #else - simde_float16x4x2_t r = { { simde_vtrn1_f16(a, b), simde_vtrn2_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_f16 - #define vtrn_f16(a, b) simde_vtrn_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2x2_t -simde_vtrn_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_f32(a, b); - #else - simde_float32x2x2_t r = { { simde_vtrn1_f32(a, b), simde_vtrn2_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_f32 - #define vtrn_f32(a, b) simde_vtrn_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8x2_t -simde_vtrn_s8(simde_int8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_s8(a, b); - #else - simde_int8x8x2_t r = { { simde_vtrn1_s8(a, b), simde_vtrn2_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_s8 - #define vtrn_s8(a, b) simde_vtrn_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4x2_t -simde_vtrn_s16(simde_int16x4_t a, simde_int16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_s16(a, b); - #else - simde_int16x4x2_t r = { { simde_vtrn1_s16(a, b), simde_vtrn2_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_s16 - #define vtrn_s16(a, b) simde_vtrn_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2x2_t -simde_vtrn_s32(simde_int32x2_t a, simde_int32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_s32(a, b); - #else - simde_int32x2x2_t r = { { simde_vtrn1_s32(a, b), simde_vtrn2_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_s32 - #define vtrn_s32(a, b) simde_vtrn_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x8x2_t -simde_vtrn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_u8(a, b); - #else - simde_uint8x8x2_t r = { { simde_vtrn1_u8(a, b), simde_vtrn2_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_u8 - #define vtrn_u8(a, b) simde_vtrn_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x4x2_t -simde_vtrn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_u16(a, b); - #else - simde_uint16x4x2_t r = { { simde_vtrn1_u16(a, b), simde_vtrn2_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_u16 - #define vtrn_u16(a, b) simde_vtrn_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x2x2_t -simde_vtrn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_u32(a, b); - #else - simde_uint32x2x2_t r = { { simde_vtrn1_u32(a, b), simde_vtrn2_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_u32 - #define vtrn_u32(a, b) simde_vtrn_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8x2_t -simde_vtrnq_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vtrnq_f16(a, b); - #else - simde_float16x8x2_t r = { { simde_vtrn1q_f16(a, b), simde_vtrn2q_f16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_f16 - #define vtrnq_f16(a, b) simde_vtrnq_f16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4x2_t -simde_vtrnq_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_f32(a, b); - #else - simde_float32x4x2_t r = { { simde_vtrn1q_f32(a, b), simde_vtrn2q_f32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_f32 - #define vtrnq_f32(a, b) simde_vtrnq_f32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16x2_t -simde_vtrnq_s8(simde_int8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_s8(a, b); - #else - simde_int8x16x2_t r = { { simde_vtrn1q_s8(a, b), simde_vtrn2q_s8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_s8 - #define vtrnq_s8(a, b) simde_vtrnq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8x2_t -simde_vtrnq_s16(simde_int16x8_t a, simde_int16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_s16(a, b); - #else - simde_int16x8x2_t r = { { simde_vtrn1q_s16(a, b), simde_vtrn2q_s16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_s16 - #define vtrnq_s16(a, b) simde_vtrnq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4x2_t -simde_vtrnq_s32(simde_int32x4_t a, simde_int32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_s32(a, b); - #else - simde_int32x4x2_t r = { { simde_vtrn1q_s32(a, b), simde_vtrn2q_s32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_s32 - #define vtrnq_s32(a, b) simde_vtrnq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint8x16x2_t -simde_vtrnq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_u8(a, b); - #else - simde_uint8x16x2_t r = { { simde_vtrn1q_u8(a, b), simde_vtrn2q_u8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_u8 - #define vtrnq_u8(a, b) simde_vtrnq_u8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint16x8x2_t -simde_vtrnq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_u16(a, b); - #else - simde_uint16x8x2_t r = { { simde_vtrn1q_u16(a, b), simde_vtrn2q_u16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_u16 - #define vtrnq_u16(a, b) simde_vtrnq_u16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint32x4x2_t -simde_vtrnq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_u32(a, b); - #else - simde_uint32x4x2_t r = { { simde_vtrn1q_u32(a, b), simde_vtrn2q_u32(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_u32 - #define vtrnq_u32(a, b) simde_vtrnq_u32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x8x2_t -simde_vtrn_p8(simde_poly8x8_t a, simde_poly8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_p8(a, b); - #else - simde_poly8x8x2_t r = { { simde_vtrn1_p8(a, b), simde_vtrn2_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_p8 - #define vtrn_p8(a, b) simde_vtrn_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x4x2_t -simde_vtrn_p16(simde_poly16x4_t a, simde_poly16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrn_p16(a, b); - #else - simde_poly16x4x2_t r = { { simde_vtrn1_p16(a, b), simde_vtrn2_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrn_p16 - #define vtrn_p16(a, b) simde_vtrn_p16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly8x16x2_t -simde_vtrnq_p8(simde_poly8x16_t a, simde_poly8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_p8(a, b); - #else - simde_poly8x16x2_t r = { { simde_vtrn1q_p8(a, b), simde_vtrn2q_p8(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_p8 - #define vtrnq_p8(a, b) simde_vtrnq_p8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_poly16x8x2_t -simde_vtrnq_p16(simde_poly16x8_t a, simde_poly16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vtrnq_p16(a, b); - #else - simde_poly16x8x2_t r = { { simde_vtrn1q_p16(a, b), simde_vtrn2q_p16(a, b) } }; - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vtrnq_p16 - #define vtrnq_p16(a, b) simde_vtrnq_p16((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_TRN_H) */ -/* :: End simde/arm/neon/trn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/uqadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_NEON_UQADD_H) -#define SIMDE_ARM_NEON_UQADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// Workaround on ARM64 windows due to windows SDK bug -// https://developercommunity.visualstudio.com/t/In-arm64_neonh-vsqaddb_u8-vsqaddh_u16/10271747?sort=newest -#if (defined _MSC_VER) && (defined SIMDE_ARM_NEON_A64V8_NATIVE) && (_MSC_VER < 1938) -#pragma message ("Due to msvc bug, current version of msvc is supported by workaround. Recommend to update msvc") -#undef vuqaddh_s16 -#define vuqaddh_s16(src1, src2) neon_suqadds16(__int16ToN16_v(src1), __uint16ToN16_v(src2)).n16_i16[0] -#undef vuqadds_s32 -#define vuqadds_s32(src1, src2) _CopyInt32FromFloat(neon_suqadds32(_CopyFloatFromInt32(src1), _CopyFloatFromUInt32(src2))) -#undef vuqaddd_s64 -#define vuqaddd_s64(src1, src2) neon_suqadds64(__int64ToN64_v(src1), __uint64ToN64_v(src2)).n64_i64[0] -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_vuqaddb_s8(int8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) - return vuqaddb_s8(a, HEDLEY_STATIC_CAST(int8_t, b)); - #else - return vuqaddb_s8(a, b); - #endif - #else - int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); - return (r_ < INT8_MIN) ? INT8_MIN : ((r_ > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddb_s8 - #define vuqaddb_s8(a, b) simde_vuqaddb_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_vuqaddh_s16(int16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) - return vuqaddh_s16(a, HEDLEY_STATIC_CAST(int16_t, b)); - #else - return vuqaddh_s16(a, b); - #endif - #else - int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); - return (r_ < INT16_MIN) ? INT16_MIN : ((r_ > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddh_s16 - #define vuqaddh_s16(a, b) simde_vuqaddh_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_vuqadds_s32(int32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) - return vuqadds_s32(a, HEDLEY_STATIC_CAST(int32_t, b)); - #else - return vuqadds_s32(a, b); - #endif - #else - int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); - return (r_ < INT32_MIN) ? INT32_MIN : ((r_ > INT32_MAX) ? INT32_MAX : HEDLEY_STATIC_CAST(int32_t, r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqadds_s32 - #define vuqadds_s32(a, b) simde_vuqadds_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_vuqaddd_s64(int64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) - return vuqaddd_s64(a, HEDLEY_STATIC_CAST(int64_t, b)); - #else - return vuqaddd_s64(a, b); - #endif - #else - /* TODO: I suspect there is room for improvement here. This is - * just the first thing that worked, and I don't feel like messing - * with it now. */ - int64_t r; - - if (a < 0) { - uint64_t na = HEDLEY_STATIC_CAST(uint64_t, -a); - if (na > b) { - uint64_t t = na - b; - r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) + 1)) ? INT64_MIN : -HEDLEY_STATIC_CAST(int64_t, t); - } else { - uint64_t t = b - na; - r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) )) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, t); - } - } else { - uint64_t ua = HEDLEY_STATIC_CAST(uint64_t, a); - r = ((INT64_MAX - ua) < b) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, ua + b); - } - - return r; - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddd_s64 - #define vuqaddd_s64(a, b) simde_vuqaddd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x8_t -simde_vuqadd_s8(simde_int8x8_t a, simde_uint8x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqadd_s8(a, b); - #else - simde_int8x8_private - r_, - a_ = simde_int8x8_to_private(a); - simde_uint8x8_private b_ = simde_uint8x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqadd_s8 - #define vuqadd_s8(a, b) simde_vuqadd_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x4_t -simde_vuqadd_s16(simde_int16x4_t a, simde_uint16x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqadd_s16(a, b); - #else - simde_int16x4_private - r_, - a_ = simde_int16x4_to_private(a); - simde_uint16x4_private b_ = simde_uint16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqadd_s16 - #define vuqadd_s16(a, b) simde_vuqadd_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vuqadd_s32(simde_int32x2_t a, simde_uint32x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqadd_s32(a, b); - #else - simde_int32x2_private - r_, - a_ = simde_int32x2_to_private(a); - simde_uint32x2_private b_ = simde_uint32x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqadd_s32 - #define vuqadd_s32(a, b) simde_vuqadd_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x1_t -simde_vuqadd_s64(simde_int64x1_t a, simde_uint64x1_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqadd_s64(a, b); - #else - simde_int64x1_private - r_, - a_ = simde_int64x1_to_private(a); - simde_uint64x1_private b_ = simde_uint64x1_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x1_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqadd_s64 - #define vuqadd_s64(a, b) simde_vuqadd_s64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int8x16_t -simde_vuqaddq_s8(simde_int8x16_t a, simde_uint8x16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqaddq_s8(a, b); - #else - simde_int8x16_private - r_, - a_ = simde_int8x16_to_private(a); - simde_uint8x16_private b_ = simde_uint8x16_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); - } - - return simde_int8x16_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddq_s8 - #define vuqaddq_s8(a, b) simde_vuqaddq_s8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int16x8_t -simde_vuqaddq_s16(simde_int16x8_t a, simde_uint16x8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqaddq_s16(a, b); - #else - simde_int16x8_private - r_, - a_ = simde_int16x8_to_private(a); - simde_uint16x8_private b_ = simde_uint16x8_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); - } - - return simde_int16x8_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddq_s16 - #define vuqaddq_s16(a, b) simde_vuqaddq_s16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vuqaddq_s32(simde_int32x4_t a, simde_uint32x4_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqaddq_s32(a, b); - #else - simde_int32x4_private - r_, - a_ = simde_int32x4_to_private(a); - simde_uint32x4_private b_ = simde_uint32x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); - } - - return simde_int32x4_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddq_s32 - #define vuqaddq_s32(a, b) simde_vuqaddq_s32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vuqaddq_s64(simde_int64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vuqaddq_s64(a, b); - #else - simde_int64x2_private - r_, - a_ = simde_int64x2_to_private(a); - simde_uint64x2_private b_ = simde_uint64x2_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); - } - - return simde_int64x2_from_private(r_); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vuqaddq_s64 - #define vuqaddq_s64(a, b) simde_vuqaddq_s64((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_UQADD_H) */ -/* :: End simde/arm/neon/uqadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/usdot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_USDOT_H) -#define SIMDE_ARM_NEON_USDOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vusdot_s32(simde_int32x2_t r, simde_uint8x8_t a, simde_int8x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - return vusdot_s32(r, a, b); - #else - simde_int32x2_private r_; - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vadd_s32(r, simde_int32x2_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vusdot_s32 - #define vusdot_s32(r, a, b) simde_vusdot_s32((r), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vusdotq_s32(simde_int32x4_t r, simde_uint8x16_t a, simde_int8x16_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - return vusdotq_s32(r, a, b); - #else - simde_int32x4_private r_; - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - for (int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); - } - r_.values[i] = acc; - } - return simde_vaddq_s32(r, simde_int32x4_from_private(r_)); - #endif -} -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vusdotq_s32 - #define vusdotq_s32(r, a, b) simde_vusdotq_s32((r), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_USDOT_H) */ -/* :: End simde/arm/neon/usdot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/usdot_lane.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_ARM_NEON_USDOT_LANE_H) -#define SIMDE_ARM_NEON_USDOT_LANE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vusdot_lane_s32(simde_int32x2_t r, simde_uint8x8_t a, simde_int8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x2_t result; - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vusdot_lane_s32(r, a, b, lane) vusdot_lane_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vusdot_lane_s32 - #define vusdot_lane_s32(r, a, b, lane) simde_vusdot_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x2_t -simde_vusdot_laneq_s32(simde_int32x2_t r, simde_uint8x8_t a, simde_int8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x2_t result; - simde_int32x2_private r_ = simde_int32x2_to_private(r); - simde_uint8x8_private a_ = simde_uint8x8_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - for (int i = 0 ; i < 2 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for (int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x2_from_private(r_); - - return result; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vusdot_laneq_s32(r, a, b, lane) vusdot_laneq_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vusdot_laneq_s32 - #define vusdot_laneq_s32(r, a, b, lane) simde_vusdot_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vusdotq_laneq_s32(simde_int32x4_t r, simde_uint8x16_t a, simde_int8x16_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int32x4_t result; - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_int8x16_private b_ = simde_int8x16_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - return result; -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vusdotq_laneq_s32(r, a, b, lane) vusdotq_laneq_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vusdotq_laneq_s32 - #define vusdotq_laneq_s32(r, a, b, lane) simde_vusdotq_laneq_s32((r), (a), (b), (lane)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vusdotq_lane_s32(simde_int32x4_t r, simde_uint8x16_t a, simde_int8x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int32x4_t result; - simde_int32x4_private r_ = simde_int32x4_to_private(r); - simde_uint8x16_private a_ = simde_uint8x16_to_private(a); - simde_int8x8_private b_ = simde_int8x8_to_private(b); - - for(int i = 0 ; i < 4 ; i++) { - int32_t acc = 0; - SIMDE_VECTORIZE_REDUCTION(+:acc) - for(int j = 0 ; j < 4 ; j++) { - const int idx_b = j + (lane << 2); - const int idx_a = j + (i << 2); - acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); - } - r_.values[i] += acc; - } - - result = simde_int32x4_from_private(r_); - return result; -} -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_MATMUL_INT8) - #define simde_vusdotq_lane_s32(r, a, b, lane) vusdotq_lane_s32((r), (a), (b), (lane)) -#endif -#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vusdotq_lane_s32 - #define vusdotq_lane_s32(r, a, b, lane) simde_vusdotq_lane_s32((r), (a), (b), (lane)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_USDOT_LANE_H) */ -/* :: End simde/arm/neon/usdot_lane.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/neon/xar.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Atharva Nimbalkar - */ - -#if !defined(SIMDE_ARM_NEON_XAR_H) -#define SIMDE_ARM_NEON_XAR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde_uint64x2_t -simde_vxarq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int d) - SIMDE_REQUIRE_CONSTANT_RANGE(d, 0, 63) { - simde_uint64x2_private - r_, - t = simde_uint64x2_to_private(simde_veorq_u64(a,b)); - - SIMDE_VECTORIZE - for (size_t i=0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = ((t.values[i] >> d) | (t.values[i] << (64 - d))); - } - - return simde_uint64x2_from_private(r_); -} -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) - #define simde_vxarq_u64(a, b, d) vxarq_u64((a), (b), (d)) -#endif -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) - #undef vxarq_u64 - #define vxarq_u64(a, b, d) simde_vxarq_u64((a), (b), (d)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_ARM_NEON_XAR_H) */ -/* :: End simde/arm/neon/xar.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#endif /* SIMDE_ARM_NEON_H */ -/* :: End simde/arm/neon.h :: */ diff --git a/src/simde/arm/sve.h b/src/simde/arm/sve.h deleted file mode 100644 index db7897fdc..000000000 --- a/src/simde/arm/sve.h +++ /dev/null @@ -1,18926 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_H) -#define SIMDE_ARM_SVE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* TODO: SVE2 is going to be a bit awkward with this setup. We currently - * either use SVE vectors or assume that the vector length is known at - * compile-time. For CPUs which provide SVE but not SVE2 we're going - * to be getting scalable vectors, so we may need to loop through them. - * - * Currently I'm thinking we'll have a separate function for non-SVE - * types. We can call that function in a loop from an SVE version, - * and we can call it once from a resolver. - * - * Unfortunately this is going to mean a lot of boilerplate for SVE, - * which already has several variants of a lot of functions (*_z, *_m, - * etc.), plus overloaded functions in C++ and generic selectors in C. - * - * Anyways, all this means that we're going to need to always define - * the portable types. - * - * The good news is that at least we don't have to deal with - * to/from_private functions; since the no-SVE versions will only be - * called with non-SVE params. */ - -#if !defined(SIMDE_ARM_SVE_TYPES_H) -#define SIMDE_ARM_SVE_TYPES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_VECTOR_SUBSCRIPT) - #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) -#else - #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] -#endif - -#if defined(SIMDE_ARM_SVE_NATIVE) - typedef svbool_t simde_svbool_t; - typedef svint8_t simde_svint8_t; - typedef svint16_t simde_svint16_t; - typedef svint32_t simde_svint32_t; - typedef svint64_t simde_svint64_t; - typedef svuint8_t simde_svuint8_t; - typedef svuint16_t simde_svuint16_t; - typedef svuint32_t simde_svuint32_t; - typedef svuint64_t simde_svuint64_t; - #if defined(__ARM_FEATURE_SVE_BF16) - typedef svbfloat16_t simde_svbfloat16_t; - #endif - typedef svfloat16_t simde_svfloat16_t; - typedef svfloat32_t simde_svfloat32_t; - typedef svfloat64_t simde_svfloat64_t; - typedef float32_t simde_float32_t; - typedef float64_t simde_float64_t; -#else - #if SIMDE_NATURAL_VECTOR_SIZE > 0 - #define SIMDE_ARM_SVE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #else - #define SIMDE_ARM_SVE_VECTOR_SIZE (128) - #endif - - typedef simde_float32 simde_float32_t; - typedef simde_float64 simde_float64_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(int8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svint8_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(int16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svint16_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(int32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svint32_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(int64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long int) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svint64_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svuint8_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svuint16_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svuint32_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x2_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long int) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svuint64_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - float16x8_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svfloat16_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svbfloat16_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float32, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512 m512; - #endif - #if defined(SIMDE_X86_AVX_NATIVE) - __m256 m256[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256)]; - #endif - #if defined(SIMDE_X86_SSE_NATIVE) - __m128 m128[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(float) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svfloat32_t; - - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float64, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512d m512d; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256d m256d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256d)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128d m128d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128d)]; - #endif - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t neon; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(double) altivec; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svfloat64_t; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - typedef struct { - __mmask64 value; - int type; - } simde_svbool_t; - - #if defined(__BMI2__) - static const uint64_t simde_arm_sve_mask_bp_lo_ = UINT64_C(0x5555555555555555); - static const uint64_t simde_arm_sve_mask_bp_hi_ = UINT64_C(0xaaaaaaaaaaaaaaaa); - - SIMDE_FUNCTION_ATTRIBUTES - __mmask64 - simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { - return HEDLEY_STATIC_CAST(__mmask64, - _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_lo_) | - _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_hi_)); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask32 - simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { - return HEDLEY_STATIC_CAST(__mmask32, - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask16 - simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { - return HEDLEY_STATIC_CAST(__mmask16, - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { - return HEDLEY_STATIC_CAST(__mmask8, - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | - _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask32 - simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { - return HEDLEY_STATIC_CAST(__mmask32, - _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_lo_)) & - _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask16 - simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { - return HEDLEY_STATIC_CAST(__mmask16, - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { - return HEDLEY_STATIC_CAST(__mmask8, - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { - return HEDLEY_STATIC_CAST(__mmask8, - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & - _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); - } - #else - SIMDE_FUNCTION_ATTRIBUTES - __mmask64 - simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { - uint64_t e = HEDLEY_STATIC_CAST(uint64_t, m); - uint64_t o = HEDLEY_STATIC_CAST(uint64_t, m); - - e = (e | (e << 16)) & UINT64_C(0x0000ffff0000ffff); - e = (e | (e << 8)) & UINT64_C(0x00ff00ff00ff00ff); - e = (e | (e << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - e = (e | (e << 2)) & UINT64_C(0x3333333333333333); - e = (e | (e << 1)) & UINT64_C(0x5555555555555555); - - o = (o | (o << 16)) & UINT64_C(0x0000ffff0000ffff); - o = (o | (o << 8)) & UINT64_C(0x00ff00ff00ff00ff); - o = (o | (o << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - o = (o | (o << 2)) & UINT64_C(0x3333333333333333); - o = (o | (o << 1)) & UINT64_C(0x5555555555555555); - - return HEDLEY_STATIC_CAST(__mmask64, e | (o << 1)); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask32 - simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { - uint32_t e = HEDLEY_STATIC_CAST(uint32_t, m); - uint32_t o = HEDLEY_STATIC_CAST(uint32_t, m); - - e = (e | (e << 8)) & UINT32_C(0x00FF00FF); - e = (e | (e << 4)) & UINT32_C(0x0F0F0F0F); - e = (e | (e << 2)) & UINT32_C(0x33333333); - e = (e | (e << 1)) & UINT32_C(0x55555555); - - o = (o | (o << 8)) & UINT32_C(0x00FF00FF); - o = (o | (o << 4)) & UINT32_C(0x0F0F0F0F); - o = (o | (o << 2)) & UINT32_C(0x33333333); - o = (o | (o << 1)) & UINT32_C(0x55555555); - - return HEDLEY_STATIC_CAST(__mmask32, e | (o << 1)); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask16 - simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { - uint16_t e = HEDLEY_STATIC_CAST(uint16_t, m); - uint16_t o = HEDLEY_STATIC_CAST(uint16_t, m); - - e = (e | (e << 4)) & UINT16_C(0x0f0f); - e = (e | (e << 2)) & UINT16_C(0x3333); - e = (e | (e << 1)) & UINT16_C(0x5555); - - o = (o | (o << 4)) & UINT16_C(0x0f0f); - o = (o | (o << 2)) & UINT16_C(0x3333); - o = (o | (o << 1)) & UINT16_C(0x5555); - - return HEDLEY_STATIC_CAST(uint16_t, e | (o << 1)); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { - uint8_t e = HEDLEY_STATIC_CAST(uint8_t, m); - uint8_t o = HEDLEY_STATIC_CAST(uint8_t, m); - - e = (e | (e << 2)) & UINT8_C(0x33); - e = (e | (e << 1)) & UINT8_C(0x55); - - o = (o | (o << 2)) & UINT8_C(0x33); - o = (o | (o << 1)) & UINT8_C(0x55); - - return HEDLEY_STATIC_CAST(uint8_t, e | (o << 1)); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask32 - simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { - uint64_t l = (HEDLEY_STATIC_CAST(uint64_t, m) ) & UINT64_C(0x5555555555555555); - l = (l | (l >> 1)) & UINT64_C(0x3333333333333333); - l = (l | (l >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - l = (l | (l >> 4)) & UINT64_C(0x00ff00ff00ff00ff); - l = (l | (l >> 8)) & UINT64_C(0x0000ffff0000ffff); - - uint64_t h = (HEDLEY_STATIC_CAST(uint64_t, m) >> 1) & UINT64_C(0x5555555555555555); - h = (h | (h >> 1)) & UINT64_C(0x3333333333333333); - h = (h | (h >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - h = (h | (h >> 4)) & UINT64_C(0x00ff00ff00ff00ff); - h = (h | (h >> 8)) & UINT64_C(0x0000ffff0000ffff); - - return HEDLEY_STATIC_CAST(uint32_t, l & h); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask16 - simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { - uint32_t l = (HEDLEY_STATIC_CAST(uint32_t, m) ) & UINT32_C(0x55555555); - l = (l | (l >> 1)) & UINT32_C(0x33333333); - l = (l | (l >> 2)) & UINT32_C(0x0f0f0f0f); - l = (l | (l >> 4)) & UINT32_C(0x00ff00ff); - l = (l | (l >> 8)) & UINT32_C(0x0000ffff); - - uint32_t h = (HEDLEY_STATIC_CAST(uint32_t, m) >> 1) & UINT32_C(0x55555555); - h = (h | (h >> 1)) & UINT32_C(0x33333333); - h = (h | (h >> 2)) & UINT32_C(0x0f0f0f0f); - h = (h | (h >> 4)) & UINT32_C(0x00ff00ff); - h = (h | (h >> 8)) & UINT32_C(0x0000ffff); - - return HEDLEY_STATIC_CAST(uint16_t, l & h); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { - uint16_t l = (HEDLEY_STATIC_CAST(uint16_t, m) ) & UINT16_C(0x5555); - l = (l | (l >> 1)) & UINT16_C(0x3333); - l = (l | (l >> 2)) & UINT16_C(0x0f0f); - l = (l | (l >> 4)) & UINT16_C(0x00ff); - - uint16_t h = (HEDLEY_STATIC_CAST(uint16_t, m) >> 1) & UINT16_C(0x5555); - h = (h | (h >> 1)) & UINT16_C(0x3333); - h = (h | (h >> 2)) & UINT16_C(0x0f0f); - h = (h | (h >> 4)) & UINT16_C(0x00ff); - - return HEDLEY_STATIC_CAST(uint8_t, l & h); - } - - SIMDE_FUNCTION_ATTRIBUTES - __mmask8 - simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { - uint8_t l = (HEDLEY_STATIC_CAST(uint8_t, m) ) & UINT8_C(0x55); - l = (l | (l >> 1)) & UINT8_C(0x33); - l = (l | (l >> 2)) & UINT8_C(0x0f); - l = (l | (l >> 4)) & UINT8_C(0xff); - - uint8_t h = (HEDLEY_STATIC_CAST(uint8_t, m) >> 1) & UINT8_C(0x55); - h = (h | (h >> 1)) & UINT8_C(0x33); - h = (h | (h >> 2)) & UINT8_C(0x0f); - h = (h | (h >> 4)) & UINT8_C(0xff); - - return HEDLEY_STATIC_CAST(uint8_t, l & h); - } - #endif - - typedef enum { - SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64, - SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32, - SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16, - SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8, - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4, - #endif - } simde_svbool_mmask_type; - - HEDLEY_CONST HEDLEY_ALWAYS_INLINE - simde_svbool_t - simde_svbool_from_mmask64(__mmask64 mi) { - simde_svbool_t b; - - b.value = HEDLEY_STATIC_CAST(__mmask64, mi); - b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64; - - return b; - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - simde_svbool_t - simde_svbool_from_mmask32(__mmask32 mi) { - simde_svbool_t b; - - b.value = HEDLEY_STATIC_CAST(__mmask64, mi); - b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32; - - return b; - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - simde_svbool_t - simde_svbool_from_mmask16(__mmask16 mi) { - simde_svbool_t b; - - b.value = HEDLEY_STATIC_CAST(__mmask64, mi); - b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16; - - return b; - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - simde_svbool_t - simde_svbool_from_mmask8(__mmask8 mi) { - simde_svbool_t b; - - b.value = HEDLEY_STATIC_CAST(__mmask64, mi); - b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8; - - return b; - } - - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - simde_svbool_t - simde_svbool_from_mmask4(__mmask8 mi) { - simde_svbool_t b; - - b.value = HEDLEY_STATIC_CAST(__mmask64, mi); - b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4; - - return b; - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - __mmask8 - simde_svbool_to_mmask4(simde_svbool_t b) { - __mmask64 tmp = b.value; - - switch (b.type) { - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask4(HEDLEY_STATIC_CAST(__mmask8, tmp))); - } - - return HEDLEY_STATIC_CAST(__mmask8, tmp); - } - #endif - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - __mmask8 - simde_svbool_to_mmask8(simde_svbool_t b) { - __mmask64 tmp = b.value; - - switch (b.type) { - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: - break; - - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); - #endif - } - - return HEDLEY_STATIC_CAST(__mmask8, tmp); - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - __mmask16 - simde_svbool_to_mmask16(simde_svbool_t b) { - __mmask64 tmp = b.value; - - switch (b.type) { - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: - break; - - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); - HEDLEY_FALL_THROUGH; - #endif - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); - } - - return HEDLEY_STATIC_CAST(__mmask16, tmp); - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - __mmask32 - simde_svbool_to_mmask32(simde_svbool_t b) { - __mmask64 tmp = b.value; - - switch (b.type) { - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: - break; - - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); - HEDLEY_FALL_THROUGH; - #endif - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); - } - - return HEDLEY_STATIC_CAST(__mmask32, tmp); - } - - SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST - __mmask64 - simde_svbool_to_mmask64(simde_svbool_t b) { - __mmask64 tmp = b.value; - - switch (b.type) { - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: - break; - - #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); - HEDLEY_FALL_THROUGH; - #endif - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); - HEDLEY_FALL_THROUGH; - case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: - tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask64(HEDLEY_STATIC_CAST(__mmask32, tmp))); - } - - return HEDLEY_STATIC_CAST(__mmask64, tmp); - } - - /* TODO: we're going to need need svbool_to/from_svint* functions - * for when we can't implement a function using AVX-512. */ - #else - typedef union { - SIMDE_ARM_SVE_DECLARE_VECTOR( int8_t, values_i8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR( int16_t, values_i16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR( int32_t, values_i32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR( int64_t, values_i64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR( uint8_t, values_u8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values_u16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values_u32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values_u64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - __m512i m512i; - #endif - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; - #endif - #if defined(SIMDE_X86_SSE2_NATIVE) - __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; - #endif - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t neon_i8; - int16x8_t neon_i16; - int32x4_t neon_i32; - int64x2_t neon_i64; - uint8x16_t neon_u8; - uint16x8_t neon_u16; - uint32x4_t neon_u32; - uint64x2_t neon_u64; - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) altivec_b8; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) altivec_b16; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) altivec_b32; - #endif - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) altivec_b64; - #endif - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t v128; - #endif - } simde_svbool_t; - - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint8, simde_svint8_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_from_svint8, simde_svbool_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint16, simde_svint16_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint16, simde_svbool_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint32, simde_svint32_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint32, simde_svbool_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint64, simde_svint64_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint64, simde_svbool_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint8, simde_svuint8_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint8, simde_svbool_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint16, simde_svuint16_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint16, simde_svbool_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint32, simde_svuint32_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint32, simde_svbool_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint64, simde_svuint64_t, simde_svbool_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint64, simde_svbool_t, simde_svuint64_t) - #endif - - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - typedef simde_svbool_t svbool_t; - typedef simde_svint8_t svint8_t; - typedef simde_svint16_t svint16_t; - typedef simde_svint32_t svint32_t; - typedef simde_svint64_t svint64_t; - typedef simde_svuint8_t svuint8_t; - typedef simde_svuint16_t svuint16_t; - typedef simde_svuint32_t svuint32_t; - typedef simde_svuint64_t svuint64_t; - typedef simde_svfloat16_t svfloat16_t; - typedef simde_svbfloat16_t svbfloat16_t; - typedef simde_svfloat32_t svfloat32_t; - typedef simde_svfloat64_t svfloat64_t; - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) - #define SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX z -#endif -#define SIMDE_ARM_SVE_UNDEFINED_SYMBOL(name) HEDLEY_CONCAT3(name, _, SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -/* These are going to be used pretty much everywhere since they are - * used to create the loops SVE requires. Since we want to support - * only including the files you need instead of just using sve.h, - * it's helpful to pull these in here. While this file is called - * arm/sve/types.h, it might be better to think of it more as - * arm/sve/common.h. */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/cnt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_CNT_H) -#define SIMDE_ARM_SVE_CNT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_svcntb(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcntb(); - #else - return sizeof(simde_svint8_t) / sizeof(int8_t); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcntb - #define svcntb() simde_svcntb() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_svcnth(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcnth(); - #else - return sizeof(simde_svint16_t) / sizeof(int16_t); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcnth - #define svcnth() simde_svcnth() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_svcntw(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcntw(); - #else - return sizeof(simde_svint32_t) / sizeof(int32_t); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcntw - #define svcntw() simde_svcntw() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_svcntd(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcntd(); - #else - return sizeof(simde_svint64_t) / sizeof(int64_t); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcntd - #define svcntd() simde_svcntd() -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_CNT_H */ -/* :: End simde/arm/sve/cnt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/ld1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* Note: we don't have vector implementations for most of these because - * we can't just load everything and mask out the uninteresting bits; - * that might cause a fault, for example if the end of the buffer buts - * up against a protected page. - * - * One thing we might be able to do would be to check if the predicate - * is all ones and, if so, use an unpredicated load instruction. This - * would probably we worthwhile for smaller types, though perhaps not - * for larger types since it would mean branching for every load plus - * the overhead of checking whether all bits are 1. */ - -#if !defined(SIMDE_ARM_SVE_LD1_H) -#define SIMDE_ARM_SVE_LD1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svld1_s8(simde_svbool_t pg, const int8_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_s8(pg, base); - #else - simde_svint8_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi8(simde_svbool_to_mmask64(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi8(simde_svbool_to_mmask32(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = pg.values_i8[i] ? base[i] : INT8_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_s8 - #define svld1_s8(pg, base) simde_svld1_s8((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svld1_s16(simde_svbool_t pg, const int16_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_s16(pg, base); - #else - simde_svint16_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi16(simde_svbool_to_mmask32(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi16(simde_svbool_to_mmask16(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = pg.values_i16[i] ? base[i] : INT16_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_s16 - #define svld1_s16(pg, base) simde_svld1_s16((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svld1_s32(simde_svbool_t pg, const int32_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_s32(pg, base); - #else - simde_svint32_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi32(simde_svbool_to_mmask16(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi32(simde_svbool_to_mmask8(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = pg.values_i32[i] ? base[i] : INT32_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_s32 - #define svld1_s32(pg, base) simde_svld1_s32((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svld1_s64(simde_svbool_t pg, const int64_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_s64(pg, base); - #else - simde_svint64_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi64(simde_svbool_to_mmask8(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi64(simde_svbool_to_mmask4(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = pg.values_i64[i] ? base[i] : INT64_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_s64 - #define svld1_s64(pg, base) simde_svld1_s64((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svld1_u8(simde_svbool_t pg, const uint8_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_u8(pg, base); - #else - simde_svuint8_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi8(simde_svbool_to_mmask64(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi8(simde_svbool_to_mmask32(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = pg.values_i8[i] ? base[i] : UINT8_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_u8 - #define svld1_u8(pg, base) simde_svld1_u8((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svld1_u16(simde_svbool_t pg, const uint16_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_u16(pg, base); - #else - simde_svuint16_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi16(simde_svbool_to_mmask32(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi16(simde_svbool_to_mmask16(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = pg.values_i16[i] ? base[i] : UINT16_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_u16 - #define svld1_u16(pg, base) simde_svld1_u16((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svld1_u32(simde_svbool_t pg, const uint32_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_u32(pg, base); - #else - simde_svuint32_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi32(simde_svbool_to_mmask16(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi32(simde_svbool_to_mmask8(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = pg.values_i32[i] ? base[i] : UINT32_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_u32 - #define svld1_u32(pg, base) simde_svld1_u32((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svld1_u64(simde_svbool_t pg, const uint64_t * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_u64(pg, base); - #else - simde_svuint64_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_loadu_epi64(simde_svbool_to_mmask8(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_loadu_epi64(simde_svbool_to_mmask4(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = pg.values_i64[i] ? base[i] : UINT64_C(0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_u64 - #define svld1_u64(pg, base) simde_svld1_u64((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svld1_f32(simde_svbool_t pg, const simde_float32 * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_f32(pg, base); - #else - simde_svfloat32_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512 = _mm512_maskz_loadu_ps(simde_svbool_to_mmask16(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256[0] = _mm256_maskz_loadu_ps(simde_svbool_to_mmask8(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = pg.values_i32[i] ? base[i] : SIMDE_FLOAT32_C(0.0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_f32 - #define svld1_f32(pg, base) simde_svld1_f32((pg), (base)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svld1_f64(simde_svbool_t pg, const simde_float64 * base) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svld1_f64(pg, base); - #else - simde_svfloat64_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512d = _mm512_maskz_loadu_pd(simde_svbool_to_mmask8(pg), base); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256d[0] = _mm256_maskz_loadu_pd(simde_svbool_to_mmask4(pg), base); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = pg.values_i64[i] ? base[i] : SIMDE_FLOAT64_C(0.0); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svld1_f64 - #define svld1_f64(pg, base) simde_svld1_f64((pg), (base)) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svld1(simde_svbool_t pg, const int8_t * base) { return simde_svld1_s8 (pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svld1(simde_svbool_t pg, const int16_t * base) { return simde_svld1_s16(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svld1(simde_svbool_t pg, const int32_t * base) { return simde_svld1_s32(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svld1(simde_svbool_t pg, const int64_t * base) { return simde_svld1_s64(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svld1(simde_svbool_t pg, const uint8_t * base) { return simde_svld1_u8 (pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svld1(simde_svbool_t pg, const uint16_t * base) { return simde_svld1_u16(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svld1(simde_svbool_t pg, const uint32_t * base) { return simde_svld1_u32(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svld1(simde_svbool_t pg, const uint64_t * base) { return simde_svld1_u64(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svld1(simde_svbool_t pg, const simde_float32 * base) { return simde_svld1_f32(pg, base); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svld1(simde_svbool_t pg, const simde_float64 * base) { return simde_svld1_f64(pg, base); } -#elif defined(SIMDE_GENERIC_) - #define simde_svld1(pg, base) \ - (SIMDE_GENERIC_((base), \ - const int8_t *: simde_svld1_s8 , \ - const int16_t *: simde_svld1_s16, \ - const int32_t *: simde_svld1_s32, \ - const int64_t *: simde_svld1_s64, \ - const uint8_t *: simde_svld1_u8 , \ - const uint16_t *: simde_svld1_u16, \ - const uint32_t *: simde_svld1_u32, \ - const uint64_t *: simde_svld1_u64, \ - const simde_float32 *: simde_svld1_f32, \ - const simde_float64 *: simde_svld1_f64)(pg, base)) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svld1 - #define svld1(pg, base) simde_svld1((pg), (base)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_LD1_H */ -/* :: End simde/arm/sve/ld1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/ptest.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_PTEST_H) -#define SIMDE_ARM_SVE_PTEST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_bool -simde_svptest_first(simde_svbool_t pg, simde_svbool_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svptest_first(pg, op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_LIKELY(pg.value & 1)) - return op.value & 1; - - if (pg.value == 0 || op.value == 0) - return 0; - - #if defined(_MSC_VER) - unsigned long r = 0; - _BitScanForward64(&r, HEDLEY_STATIC_CAST(uint64_t, pg.value)); - return (op.value >> r) & 1; - #else - return (op.value >> __builtin_ctzll(HEDLEY_STATIC_CAST(unsigned long long, pg.value))) & 1; - #endif - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - if (pg.values_i8[i]) { - return !!op.values_i8[i]; - } - } - - return 0; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svptest_first - #define svptest_first(pg, op) simde_svptest_first(pg, op) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_PTEST_H */ -/* :: End simde/arm/sve/ptest.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/ptrue.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_PTRUE_H) -#define SIMDE_ARM_SVE_PTRUE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svptrue_b8(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svptrue_b8(); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svbool_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r = simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, ~UINT64_C(0))); - #else - r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0))); - #endif - - return r; - #else - simde_svint8_t r; - - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = ~INT8_C(0); - } - - return simde_svbool_from_svint8(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svptrue_b8 - #define svptrue_b8() simde_svptrue_b8() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svptrue_b16(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svptrue_b16(); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svbool_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0))); - #else - r = simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0))); - #endif - - return r; - #else - simde_svint16_t r; - - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = ~INT16_C(0); - } - - return simde_svbool_from_svint16(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svptrue_b16 - #define svptrue_b16() simde_svptrue_b16() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svptrue_b32(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svptrue_b32(); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svbool_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0))); - #else - r = simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); - #endif - - return r; - #else - simde_svint32_t r; - - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = ~INT32_C(0); - } - - return simde_svbool_from_svint32(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svptrue_b32 - #define svptrue_b32() simde_svptrue_b32() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svptrue_b64(void) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svptrue_b64(); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svbool_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r = simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); - #else - r = simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); - #endif - - return r; - #else - simde_svint64_t r; - - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = ~INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svptrue_b64 - #define svptrue_b64() simde_svptrue_b64() -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_PTRUE_H */ -/* :: End simde/arm/sve/ptrue.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/st1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_ST1_H) -#define SIMDE_ARM_SVE_ST1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_s8(simde_svbool_t pg, int8_t * base, simde_svint8_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_s8(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi8(base, simde_svbool_to_mmask64(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi8(base, simde_svbool_to_mmask32(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - if (pg.values_i8[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_s8 - #define svst1_s8(pg, base, data) simde_svst1_s8((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_s16(simde_svbool_t pg, int16_t * base, simde_svint16_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_s16(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi16(base, simde_svbool_to_mmask32(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi16(base, simde_svbool_to_mmask16(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - if (pg.values_i16[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_s16 - #define svst1_s16(pg, base, data) simde_svst1_s16((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_s32(simde_svbool_t pg, int32_t * base, simde_svint32_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_s32(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi32(base, simde_svbool_to_mmask16(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi32(base, simde_svbool_to_mmask8(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - if (pg.values_i32[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_s32 - #define svst1_s32(pg, base, data) simde_svst1_s32((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_s64(simde_svbool_t pg, int64_t * base, simde_svint64_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_s64(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi64(base, simde_svbool_to_mmask8(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi64(base, simde_svbool_to_mmask4(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - if (pg.values_i64[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_s64 - #define svst1_s64(pg, base, data) simde_svst1_s64((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_u8(simde_svbool_t pg, uint8_t * base, simde_svuint8_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_u8(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi8(base, simde_svbool_to_mmask64(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi8(base, simde_svbool_to_mmask32(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - if (pg.values_u8[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_u8 - #define svst1_u8(pg, base, data) simde_svst1_u8((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_u16(simde_svbool_t pg, uint16_t * base, simde_svuint16_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_u16(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi16(base, simde_svbool_to_mmask32(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi16(base, simde_svbool_to_mmask16(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - if (pg.values_u16[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_u16 - #define svst1_u16(pg, base, data) simde_svst1_u16((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_u32(simde_svbool_t pg, uint32_t * base, simde_svuint32_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_u32(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi32(base, simde_svbool_to_mmask16(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi32(base, simde_svbool_to_mmask8(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - if (pg.values_u32[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_u32 - #define svst1_u32(pg, base, data) simde_svst1_u32((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_u64(simde_svbool_t pg, uint64_t * base, simde_svuint64_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_u64(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_epi64(base, simde_svbool_to_mmask8(pg), data.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_epi64(base, simde_svbool_to_mmask4(pg), data.m256i[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - if (pg.values_u64[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_u64 - #define svst1_u64(pg, base, data) simde_svst1_u64((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_f32(simde_svbool_t pg, simde_float32 * base, simde_svfloat32_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_f32(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_ps(base, simde_svbool_to_mmask16(pg), data.m512); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_ps(base, simde_svbool_to_mmask8(pg), data.m256[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - if (pg.values_i32[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_f32 - #define svst1_f32(pg, base, data) simde_svst1_f32((pg), (base), (data)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_svst1_f64(simde_svbool_t pg, simde_float64 * base, simde_svfloat64_t data) { - #if defined(SIMDE_ARM_SVE_NATIVE) - svst1_f64(pg, base, data); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm512_mask_storeu_pd(base, simde_svbool_to_mmask8(pg), data.m512d); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - _mm256_mask_storeu_pd(base, simde_svbool_to_mmask4(pg), data.m256d[0]); - #else - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - if (pg.values_i64[i]) { - base[i] = data.values[i]; - } - } - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svst1_f64 - #define svst1_f64(pg, base, data) simde_svst1_f64((pg), (base), (data)) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int8_t * base, simde_svint8_t data) { simde_svst1_s8 (pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int16_t * base, simde_svint16_t data) { simde_svst1_s16(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int32_t * base, simde_svint32_t data) { simde_svst1_s32(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int64_t * base, simde_svint64_t data) { simde_svst1_s64(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint8_t * base, simde_svuint8_t data) { simde_svst1_u8 (pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint16_t * base, simde_svuint16_t data) { simde_svst1_u16(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint32_t * base, simde_svuint32_t data) { simde_svst1_u32(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint64_t * base, simde_svuint64_t data) { simde_svst1_u64(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, simde_float32 * base, simde_svfloat32_t data) { simde_svst1_f32(pg, base, data); } - SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, simde_float64 * base, simde_svfloat64_t data) { simde_svst1_f64(pg, base, data); } -#elif defined(SIMDE_GENERIC_) - #define simde_svst1(pg, base, data) \ - (SIMDE_GENERIC_((data), \ - simde_svint8_t: simde_svst1_s8 , \ - simde_svint16_t: simde_svst1_s16, \ - simde_svint32_t: simde_svst1_s32, \ - simde_svint64_t: simde_svst1_s64, \ - simde_svuint8_t: simde_svst1_u8 , \ - simde_svuint16_t: simde_svst1_u16, \ - simde_svuint32_t: simde_svst1_u32, \ - simde_svuint64_t: simde_svst1_u64, \ - simde_svfloat32_t: simde_svst1_f32, \ - simde_svfloat64_t: simde_svst1_f64)((pg), (base), (data))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svst1 - #define svst1(pg, base, data) simde_svst1((pg), (base), (data)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_ST1_H */ -/* :: End simde/arm/sve/st1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/whilelt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_WHILELT_H) -#define SIMDE_ARM_SVE_WHILELT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b8_s32(int32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b8_s32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); - if (HEDLEY_UNLIKELY(remaining < 64)) { - r >>= 64 - remaining; - } - - return simde_svbool_from_mmask64(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #else - simde_svint8_t r; - - int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); - } - - return simde_svbool_from_svint8(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b8_s32 - #define svwhilelt_b8_s32(op1, op2) simde_svwhilelt_b8_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b16_s32(int32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b16_s32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #else - simde_svint16_t r; - - int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); - } - - return simde_svbool_from_svint16(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b16_s32 - #define svwhilelt_b16_s32(op1, op2) simde_svwhilelt_b16_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b32_s32(int32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b32_s32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #else - simde_svint32_t r; - - int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT32_C(0) : INT32_C(0); - } - - return simde_svbool_from_svint32(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b32_s32 - #define svwhilelt_b32_s32(op1, op2) simde_svwhilelt_b32_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b64_s32(int32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b64_s32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); - if (HEDLEY_UNLIKELY(remaining < 4)) { - r >>= 4 - remaining; - } - - return simde_svbool_from_mmask4(r); - #else - simde_svint64_t r; - - int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b64_s32 - #define svwhilelt_b64_s32(op1, op2) simde_svwhilelt_b64_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b8_s64(int64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b8_s64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); - if (HEDLEY_UNLIKELY(remaining < 64)) { - r >>= 64 - remaining; - } - - return simde_svbool_from_mmask64(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #else - simde_svint8_t r; - - int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); - } - - return simde_svbool_from_svint8(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b8_s64 - #define svwhilelt_b8_s64(op1, op2) simde_svwhilelt_b8_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b16_s64(int64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b16_s64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #else - simde_svint16_t r; - - int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); - } - - return simde_svbool_from_svint16(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b16_s64 - #define svwhilelt_b16_s64(op1, op2) simde_svwhilelt_b16_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b32_s64(int64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b32_s64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #else - simde_svint64_t r; - - int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b32_s64 - #define svwhilelt_b32_s64(op1, op2) simde_svwhilelt_b32_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b64_s64(int64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b64_s64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); - - int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); - if (HEDLEY_UNLIKELY(remaining < 4)) { - r >>= 4 - remaining; - } - - return simde_svbool_from_mmask4(r); - #else - simde_svint64_t r; - - int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b64_s64 - #define svwhilelt_b64_s64(op1, op2) simde_svwhilelt_b64_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b8_u32(uint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b8_u32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); - if (HEDLEY_UNLIKELY(remaining < 64)) { - r >>= 64 - remaining; - } - - return simde_svbool_from_mmask64(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #else - simde_svint8_t r; - - uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); - } - - return simde_svbool_from_svint8(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b8_u32 - #define svwhilelt_b8_u32(op1, op2) simde_svwhilelt_b8_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b16_u32(uint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b16_u32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #else - simde_svint16_t r; - - uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); - } - - return simde_svbool_from_svint16(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b16_u32 - #define svwhilelt_b16_u32(op1, op2) simde_svwhilelt_b16_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b32_u32(uint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b32_u32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #else - simde_svuint32_t r; - - uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT32_C(0) : UINT32_C(0); - } - - return simde_svbool_from_svuint32(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b32_u32 - #define svwhilelt_b32_u32(op1, op2) simde_svwhilelt_b32_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b64_u32(uint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b64_u32(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); - if (HEDLEY_UNLIKELY(remaining < 4)) { - r >>= 4 - remaining; - } - - return simde_svbool_from_mmask4(r); - #else - simde_svint64_t r; - - uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b64_u32 - #define svwhilelt_b64_u32(op1, op2) simde_svwhilelt_b64_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b8_u64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); - if (HEDLEY_UNLIKELY(remaining < 64)) { - r >>= 64 - remaining; - } - - return simde_svbool_from_mmask64(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT64_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #else - simde_svint8_t r; - - uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); - } - - return simde_svbool_from_svint8(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b8_u64 - #define svwhilelt_b8_u64(op1, op2) simde_svwhilelt_b8_u64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b16_u64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); - if (HEDLEY_UNLIKELY(remaining < 32)) { - r >>= 32 - remaining; - } - - return simde_svbool_from_mmask32(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #else - simde_svint16_t r; - - uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); - } - - return simde_svbool_from_svint16(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b16_u64 - #define svwhilelt_b16_u64(op1, op2) simde_svwhilelt_b16_u64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b32_u64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); - if (HEDLEY_UNLIKELY(remaining < 16)) { - r >>= 16 - remaining; - } - - return simde_svbool_from_mmask16(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #else - simde_svuint64_t r; - - uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~UINT64_C(0) : UINT64_C(0); - } - - return simde_svbool_from_svuint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b32_u64 - #define svwhilelt_b32_u64(op1, op2) simde_svwhilelt_b32_u64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svwhilelt_b64_u64(op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); - if (HEDLEY_UNLIKELY(remaining < 8)) { - r >>= 8 - remaining; - } - - return simde_svbool_from_mmask8(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - if (HEDLEY_UNLIKELY(op1 >= op2)) - return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); - - uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); - if (HEDLEY_UNLIKELY(remaining < 4)) { - r >>= 4 - remaining; - } - - return simde_svbool_from_mmask4(r); - #else - simde_svint64_t r; - - uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); - - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { - r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); - } - - return simde_svbool_from_svint64(r); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svwhilelt_b64_u64 - #define svwhilelt_b64_u64(op1, op2) simde_svwhilelt_b64_u64(op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 ( int32_t op1, int32_t op2) { return simde_svwhilelt_b8_s32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 ( int64_t op1, int64_t op2) { return simde_svwhilelt_b8_s64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 (uint32_t op1, uint32_t op2) { return simde_svwhilelt_b8_u32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 (uint64_t op1, uint64_t op2) { return simde_svwhilelt_b8_u64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16( int32_t op1, int32_t op2) { return simde_svwhilelt_b16_s32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16( int64_t op1, int64_t op2) { return simde_svwhilelt_b16_s64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b16_u32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b16_u64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32( int32_t op1, int32_t op2) { return simde_svwhilelt_b32_s32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32( int64_t op1, int64_t op2) { return simde_svwhilelt_b32_s64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b32_u32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b32_u64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64( int32_t op1, int32_t op2) { return simde_svwhilelt_b64_s32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64( int64_t op1, int64_t op2) { return simde_svwhilelt_b64_s64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b64_u32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b64_u64(op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_svwhilelt_b8(op1, op2) \ - (SIMDE_GENERIC_((op1), \ - int32_t: simde_svwhilelt_b8_s32, \ - uint32_t: simde_svwhilelt_b8_u32, \ - int64_t: simde_svwhilelt_b8_s64, \ - uint64_t: simde_svwhilelt_b8_u64)((op1), (op2))) - #define simde_svwhilelt_b16(op1, op2) \ - (SIMDE_GENERIC_((op1), \ - int32_t: simde_svwhilelt_b16_s32, \ - uint32_t: simde_svwhilelt_b16_u32, \ - int64_t: simde_svwhilelt_b16_s64, \ - uint64_t: simde_svwhilelt_b16_u64)((op1), (op2))) - #define simde_svwhilelt_b32(op1, op2) \ - (SIMDE_GENERIC_((op1), \ - int32_t: simde_svwhilelt_b32_s32, \ - uint32_t: simde_svwhilelt_b32_u32, \ - int64_t: simde_svwhilelt_b32_s64, \ - uint64_t: simde_svwhilelt_b32_u64)((op1), (op2))) - #define simde_svwhilelt_b64(op1, op2) \ - (SIMDE_GENERIC_((op1), \ - int32_t: simde_svwhilelt_b64_s32, \ - uint32_t: simde_svwhilelt_b64_u32, \ - int64_t: simde_svwhilelt_b64_s64, \ - uint64_t: simde_svwhilelt_b64_u64)((op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svwhilelt_b8 - #undef svwhilelt_b16 - #undef svwhilelt_b32 - #undef svwhilelt_b64 - #define svwhilelt_b8(op1, op2) simde_svwhilelt_b8((op1), (op2)) - #define svwhilelt_b16(op1, op2) simde_svwhilelt_b16((op1), (op2)) - #define svwhilelt_b32(op1, op2) simde_svwhilelt_b32((op1), (op2)) - #define svwhilelt_b64(op1, op2) simde_svwhilelt_b64((op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_WHILELT_H */ -/* :: End simde/arm/sve/whilelt.h :: */ - -#endif /* SIMDE_ARM_SVE_TYPES_H */ -/* :: End simde/arm/sve/types.h :: */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/add.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_ADD_H) -#define SIMDE_ARM_SVE_ADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/sel.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_SEL_H) -#define SIMDE_ARM_SVE_SEL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/reinterpret.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_REINTERPRET_H) -#define SIMDE_ARM_SVE_REINTERPRET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_ARM_SVE_NATIVE) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s16( simde_svint16_t op) { return svreinterpret_s8_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s32( simde_svint32_t op) { return svreinterpret_s8_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s64( simde_svint64_t op) { return svreinterpret_s8_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u8( simde_svuint8_t op) { return svreinterpret_s8_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u16( simde_svuint16_t op) { return svreinterpret_s8_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u32( simde_svuint32_t op) { return svreinterpret_s8_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u64( simde_svuint64_t op) { return svreinterpret_s8_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f16( simde_svfloat16_t op) { return svreinterpret_s8_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f32( simde_svfloat32_t op) { return svreinterpret_s8_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f64( simde_svfloat64_t op) { return svreinterpret_s8_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s8( simde_svint8_t op) { return svreinterpret_s16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s32( simde_svint32_t op) { return svreinterpret_s16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s64( simde_svint64_t op) { return svreinterpret_s16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u8( simde_svuint8_t op) { return svreinterpret_s16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u16( simde_svuint16_t op) { return svreinterpret_s16_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u32( simde_svuint32_t op) { return svreinterpret_s16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u64( simde_svuint64_t op) { return svreinterpret_s16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f16( simde_svfloat16_t op) { return svreinterpret_s16_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f32( simde_svfloat32_t op) { return svreinterpret_s16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f64( simde_svfloat64_t op) { return svreinterpret_s16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s8( simde_svint8_t op) { return svreinterpret_s32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s16( simde_svint16_t op) { return svreinterpret_s32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s64( simde_svint64_t op) { return svreinterpret_s32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u8( simde_svuint8_t op) { return svreinterpret_s32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u16( simde_svuint16_t op) { return svreinterpret_s32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u32( simde_svuint32_t op) { return svreinterpret_s32_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u64( simde_svuint64_t op) { return svreinterpret_s32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f16( simde_svfloat16_t op) { return svreinterpret_s32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f32( simde_svfloat32_t op) { return svreinterpret_s32_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f64( simde_svfloat64_t op) { return svreinterpret_s32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s8( simde_svint8_t op) { return svreinterpret_s64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s16( simde_svint16_t op) { return svreinterpret_s64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s32( simde_svint32_t op) { return svreinterpret_s64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u8( simde_svuint8_t op) { return svreinterpret_s64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u16( simde_svuint16_t op) { return svreinterpret_s64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u32( simde_svuint32_t op) { return svreinterpret_s64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u64( simde_svuint64_t op) { return svreinterpret_s64_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f16( simde_svfloat16_t op) { return svreinterpret_s64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f32( simde_svfloat32_t op) { return svreinterpret_s64_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f64( simde_svfloat64_t op) { return svreinterpret_s64_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s8( simde_svint8_t op) { return svreinterpret_u8_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s16( simde_svint16_t op) { return svreinterpret_u8_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s32( simde_svint32_t op) { return svreinterpret_u8_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s64( simde_svint64_t op) { return svreinterpret_u8_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u16( simde_svuint16_t op) { return svreinterpret_u8_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u32( simde_svuint32_t op) { return svreinterpret_u8_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u64( simde_svuint64_t op) { return svreinterpret_u8_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f16( simde_svfloat16_t op) { return svreinterpret_u8_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f32( simde_svfloat32_t op) { return svreinterpret_u8_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f64( simde_svfloat64_t op) { return svreinterpret_u8_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s8( simde_svint8_t op) { return svreinterpret_u16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s16( simde_svint16_t op) { return svreinterpret_u16_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s32( simde_svint32_t op) { return svreinterpret_u16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s64( simde_svint64_t op) { return svreinterpret_u16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u8( simde_svuint8_t op) { return svreinterpret_u16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u32( simde_svuint32_t op) { return svreinterpret_u16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u64( simde_svuint64_t op) { return svreinterpret_u16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f16( simde_svfloat16_t op) { return svreinterpret_u16_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f32( simde_svfloat32_t op) { return svreinterpret_u16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f64( simde_svfloat64_t op) { return svreinterpret_u16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s8( simde_svint8_t op) { return svreinterpret_u32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s16( simde_svint16_t op) { return svreinterpret_u32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s32( simde_svint32_t op) { return svreinterpret_u32_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s64( simde_svint64_t op) { return svreinterpret_u32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u8( simde_svuint8_t op) { return svreinterpret_u32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u16( simde_svuint16_t op) { return svreinterpret_u32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u64( simde_svuint64_t op) { return svreinterpret_u32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f16( simde_svfloat16_t op) { return svreinterpret_u32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f32( simde_svfloat32_t op) { return svreinterpret_u32_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f64( simde_svfloat64_t op) { return svreinterpret_u32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s8( simde_svint8_t op) { return svreinterpret_u64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s16( simde_svint16_t op) { return svreinterpret_u64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s32( simde_svint32_t op) { return svreinterpret_u64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s64( simde_svint64_t op) { return svreinterpret_u64_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u8( simde_svuint8_t op) { return svreinterpret_u64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u16( simde_svuint16_t op) { return svreinterpret_u64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u32( simde_svuint32_t op) { return svreinterpret_u64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f16( simde_svfloat16_t op) { return svreinterpret_u64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f32( simde_svfloat32_t op) { return svreinterpret_u64_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f64( simde_svfloat64_t op) { return svreinterpret_u64_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s8( simde_svint8_t op) { return svreinterpret_f16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s16( simde_svint16_t op) { return svreinterpret_f16_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s32( simde_svint32_t op) { return svreinterpret_f16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s64( simde_svint64_t op) { return svreinterpret_f16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u8( simde_svuint8_t op) { return svreinterpret_f16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u16( simde_svuint16_t op) { return svreinterpret_f16_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u32( simde_svuint32_t op) { return svreinterpret_f16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u64( simde_svuint64_t op) { return svreinterpret_f16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_f32( simde_svfloat32_t op) { return svreinterpret_f16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_f64( simde_svfloat64_t op) { return svreinterpret_f16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s8( simde_svint8_t op) { return svreinterpret_f32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s16( simde_svint16_t op) { return svreinterpret_f32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s32( simde_svint32_t op) { return svreinterpret_f32_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s64( simde_svint64_t op) { return svreinterpret_f32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u8( simde_svuint8_t op) { return svreinterpret_f32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u16( simde_svuint16_t op) { return svreinterpret_f32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u32( simde_svuint32_t op) { return svreinterpret_f32_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u64( simde_svuint64_t op) { return svreinterpret_f32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_f16( simde_svfloat16_t op) { return svreinterpret_f32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_f64( simde_svfloat64_t op) { return svreinterpret_f32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s8( simde_svint8_t op) { return svreinterpret_f64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s16( simde_svint16_t op) { return svreinterpret_f64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s32( simde_svint32_t op) { return svreinterpret_f64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s64( simde_svint64_t op) { return svreinterpret_f64_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u8( simde_svuint8_t op) { return svreinterpret_f64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u16( simde_svuint16_t op) { return svreinterpret_f64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u32( simde_svuint32_t op) { return svreinterpret_f64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u64( simde_svuint64_t op) { return svreinterpret_f64_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_f16( simde_svfloat16_t op) { return svreinterpret_f64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_f32( simde_svfloat32_t op) { return svreinterpret_f64_f32(op); } -#else - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s16, simde_svint8_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s32, simde_svint8_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s64, simde_svint8_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u8, simde_svint8_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u16, simde_svint8_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u32, simde_svint8_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u64, simde_svint8_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f16, simde_svint8_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f32, simde_svint8_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f64, simde_svint8_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s8, simde_svint16_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s32, simde_svint16_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s64, simde_svint16_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u8, simde_svint16_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u16, simde_svint16_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u32, simde_svint16_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u64, simde_svint16_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f16, simde_svint16_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f32, simde_svint16_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f64, simde_svint16_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s8, simde_svint32_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s16, simde_svint32_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s64, simde_svint32_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u8, simde_svint32_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u16, simde_svint32_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u32, simde_svint32_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u64, simde_svint32_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f16, simde_svint32_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f32, simde_svint32_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f64, simde_svint32_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s8, simde_svint64_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s16, simde_svint64_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s32, simde_svint64_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u8, simde_svint64_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u16, simde_svint64_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u32, simde_svint64_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u64, simde_svint64_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f16, simde_svint64_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f32, simde_svint64_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f64, simde_svint64_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s8, simde_svuint8_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s16, simde_svuint8_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s32, simde_svuint8_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s64, simde_svuint8_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u16, simde_svuint8_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u32, simde_svuint8_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u64, simde_svuint8_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f16, simde_svuint8_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f32, simde_svuint8_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f64, simde_svuint8_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s8, simde_svuint16_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s16, simde_svuint16_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s32, simde_svuint16_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s64, simde_svuint16_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u8, simde_svuint16_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u32, simde_svuint16_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u64, simde_svuint16_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f16, simde_svuint16_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f32, simde_svuint16_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f64, simde_svuint16_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s8, simde_svuint32_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s16, simde_svuint32_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s32, simde_svuint32_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s64, simde_svuint32_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u8, simde_svuint32_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u16, simde_svuint32_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u64, simde_svuint32_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f16, simde_svuint32_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f32, simde_svuint32_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f64, simde_svuint32_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s8, simde_svuint64_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s16, simde_svuint64_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s32, simde_svuint64_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s64, simde_svuint64_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u8, simde_svuint64_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u16, simde_svuint64_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u32, simde_svuint64_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f16, simde_svuint64_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f32, simde_svuint64_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f64, simde_svuint64_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s8, simde_svfloat16_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s16, simde_svfloat16_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s32, simde_svfloat16_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s64, simde_svfloat16_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u8, simde_svfloat16_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u16, simde_svfloat16_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u32, simde_svfloat16_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u64, simde_svfloat16_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_f32, simde_svfloat16_t, simde_svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_f64, simde_svfloat16_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s8, simde_svfloat32_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s16, simde_svfloat32_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s32, simde_svfloat32_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s64, simde_svfloat32_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u8, simde_svfloat32_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u16, simde_svfloat32_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u32, simde_svfloat32_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u64, simde_svfloat32_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_f16, simde_svfloat32_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_f64, simde_svfloat32_t, simde_svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s8, simde_svfloat64_t, simde_svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s16, simde_svfloat64_t, simde_svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s32, simde_svfloat64_t, simde_svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s64, simde_svfloat64_t, simde_svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u8, simde_svfloat64_t, simde_svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u16, simde_svfloat64_t, simde_svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u32, simde_svfloat64_t, simde_svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u64, simde_svfloat64_t, simde_svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_f16, simde_svfloat64_t, simde_svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_f32, simde_svfloat64_t, simde_svfloat32_t) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint16_t op) { return simde_svreinterpret_s8_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint32_t op) { return simde_svreinterpret_s8_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint64_t op) { return simde_svreinterpret_s8_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint8_t op) { return simde_svreinterpret_s8_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint16_t op) { return simde_svreinterpret_s8_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint32_t op) { return simde_svreinterpret_s8_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint64_t op) { return simde_svreinterpret_s8_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat16_t op) { return simde_svreinterpret_s8_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat32_t op) { return simde_svreinterpret_s8_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat64_t op) { return simde_svreinterpret_s8_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint8_t op) { return simde_svreinterpret_s16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint32_t op) { return simde_svreinterpret_s16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint64_t op) { return simde_svreinterpret_s16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint8_t op) { return simde_svreinterpret_s16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint16_t op) { return simde_svreinterpret_s16_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint32_t op) { return simde_svreinterpret_s16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint64_t op) { return simde_svreinterpret_s16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat16_t op) { return simde_svreinterpret_s16_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat32_t op) { return simde_svreinterpret_s16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat64_t op) { return simde_svreinterpret_s16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint8_t op) { return simde_svreinterpret_s32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint16_t op) { return simde_svreinterpret_s32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint64_t op) { return simde_svreinterpret_s32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint8_t op) { return simde_svreinterpret_s32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint16_t op) { return simde_svreinterpret_s32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint32_t op) { return simde_svreinterpret_s32_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint64_t op) { return simde_svreinterpret_s32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat16_t op) { return simde_svreinterpret_s32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat32_t op) { return simde_svreinterpret_s32_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat64_t op) { return simde_svreinterpret_s32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint8_t op) { return simde_svreinterpret_s64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint16_t op) { return simde_svreinterpret_s64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint32_t op) { return simde_svreinterpret_s64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint8_t op) { return simde_svreinterpret_s64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint16_t op) { return simde_svreinterpret_s64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint32_t op) { return simde_svreinterpret_s64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint64_t op) { return simde_svreinterpret_s64_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat16_t op) { return simde_svreinterpret_s64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat32_t op) { return simde_svreinterpret_s64_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat64_t op) { return simde_svreinterpret_s64_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint8_t op) { return simde_svreinterpret_u8_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint16_t op) { return simde_svreinterpret_u8_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint32_t op) { return simde_svreinterpret_u8_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint64_t op) { return simde_svreinterpret_u8_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint16_t op) { return simde_svreinterpret_u8_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint32_t op) { return simde_svreinterpret_u8_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint64_t op) { return simde_svreinterpret_u8_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat16_t op) { return simde_svreinterpret_u8_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat32_t op) { return simde_svreinterpret_u8_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat64_t op) { return simde_svreinterpret_u8_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint8_t op) { return simde_svreinterpret_u16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint16_t op) { return simde_svreinterpret_u16_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint32_t op) { return simde_svreinterpret_u16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint64_t op) { return simde_svreinterpret_u16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint8_t op) { return simde_svreinterpret_u16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint32_t op) { return simde_svreinterpret_u16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint64_t op) { return simde_svreinterpret_u16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat16_t op) { return simde_svreinterpret_u16_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat32_t op) { return simde_svreinterpret_u16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat64_t op) { return simde_svreinterpret_u16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint8_t op) { return simde_svreinterpret_u32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint16_t op) { return simde_svreinterpret_u32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint32_t op) { return simde_svreinterpret_u32_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint64_t op) { return simde_svreinterpret_u32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint8_t op) { return simde_svreinterpret_u32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint16_t op) { return simde_svreinterpret_u32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint64_t op) { return simde_svreinterpret_u32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat16_t op) { return simde_svreinterpret_u32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat32_t op) { return simde_svreinterpret_u32_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat64_t op) { return simde_svreinterpret_u32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint8_t op) { return simde_svreinterpret_u64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint16_t op) { return simde_svreinterpret_u64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint32_t op) { return simde_svreinterpret_u64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint64_t op) { return simde_svreinterpret_u64_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint8_t op) { return simde_svreinterpret_u64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint16_t op) { return simde_svreinterpret_u64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint32_t op) { return simde_svreinterpret_u64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat16_t op) { return simde_svreinterpret_u64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat32_t op) { return simde_svreinterpret_u64_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat64_t op) { return simde_svreinterpret_u64_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint8_t op) { return simde_svreinterpret_f16_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint16_t op) { return simde_svreinterpret_f16_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint32_t op) { return simde_svreinterpret_f16_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint64_t op) { return simde_svreinterpret_f16_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint8_t op) { return simde_svreinterpret_f16_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint16_t op) { return simde_svreinterpret_f16_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint32_t op) { return simde_svreinterpret_f16_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint64_t op) { return simde_svreinterpret_f16_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svfloat32_t op) { return simde_svreinterpret_f16_f32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svfloat64_t op) { return simde_svreinterpret_f16_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint8_t op) { return simde_svreinterpret_f32_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint16_t op) { return simde_svreinterpret_f32_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint32_t op) { return simde_svreinterpret_f32_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint64_t op) { return simde_svreinterpret_f32_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint8_t op) { return simde_svreinterpret_f32_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint16_t op) { return simde_svreinterpret_f32_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint32_t op) { return simde_svreinterpret_f32_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint64_t op) { return simde_svreinterpret_f32_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svfloat16_t op) { return simde_svreinterpret_f32_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svfloat64_t op) { return simde_svreinterpret_f32_f64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint8_t op) { return simde_svreinterpret_f64_s8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint16_t op) { return simde_svreinterpret_f64_s16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint32_t op) { return simde_svreinterpret_f64_s32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint64_t op) { return simde_svreinterpret_f64_s64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint8_t op) { return simde_svreinterpret_f64_u8(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint16_t op) { return simde_svreinterpret_f64_u16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint32_t op) { return simde_svreinterpret_f64_u32(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint64_t op) { return simde_svreinterpret_f64_u64(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svfloat16_t op) { return simde_svreinterpret_f64_f16(op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svfloat32_t op) { return simde_svreinterpret_f64_f32(op); } - - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svfloat32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svfloat64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint8_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint32_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint64_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svfloat16_t) - SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svfloat32_t) - #endif /* defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) */ -#elif defined(SIMDE_GENERIC_) - #define simde_svreinterpret_f64(op) \ - (_Generic((op), \ - simde_svint16_t: simde_svreinterpret_s8_s16, \ - simde_svint32_t: simde_svreinterpret_s8_s32, \ - simde_svint64_t: simde_svreinterpret_s8_s64, \ - simde_svuint8_t: simde_svreinterpret_s8_u8, \ - simde_svuint16_t: simde_svreinterpret_s8_u16, \ - simde_svuint32_t: simde_svreinterpret_s8_u32, \ - simde_svuint64_t: simde_svreinterpret_s8_u64, \ - simde_svfloat16_t: simde_svreinterpret_s8_f16, \ - simde_svfloat32_t: simde_svreinterpret_s8_f32, \ - simde_svfloat64_t: simde_svreinterpret_s8_f64)(op)) - #define simde_svreinterpret_s8(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_s16_s8, \ - simde_svint32_t: simde_svreinterpret_s16_s32, \ - simde_svint64_t: simde_svreinterpret_s16_s64, \ - simde_svuint8_t: simde_svreinterpret_s16_u8, \ - simde_svuint16_t: simde_svreinterpret_s16_u16, \ - simde_svuint32_t: simde_svreinterpret_s16_u32, \ - simde_svuint64_t: simde_svreinterpret_s16_u64, \ - simde_svfloat16_t: simde_svreinterpret_s16_f16, \ - simde_svfloat32_t: simde_svreinterpret_s16_f32, \ - simde_svfloat64_t: simde_svreinterpret_s16_f64)(op)) - #define simde_svreinterpret_s16(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_s32_s8, \ - simde_svint16_t: simde_svreinterpret_s32_s16, \ - simde_svint64_t: simde_svreinterpret_s32_s64, \ - simde_svuint8_t: simde_svreinterpret_s32_u8, \ - simde_svuint16_t: simde_svreinterpret_s32_u16, \ - simde_svuint32_t: simde_svreinterpret_s32_u32, \ - simde_svuint64_t: simde_svreinterpret_s32_u64, \ - simde_svfloat16_t: simde_svreinterpret_s32_f16, \ - simde_svfloat32_t: simde_svreinterpret_s32_f32, \ - simde_svfloat64_t: simde_svreinterpret_s32_f64)(op)) - #define simde_svreinterpret_s32(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_s64_s8, \ - simde_svint16_t: simde_svreinterpret_s64_s16, \ - simde_svint32_t: simde_svreinterpret_s64_s32, \ - simde_svuint8_t: simde_svreinterpret_s64_u8, \ - simde_svuint16_t: simde_svreinterpret_s64_u16, \ - simde_svuint32_t: simde_svreinterpret_s64_u32, \ - simde_svuint64_t: simde_svreinterpret_s64_u64, \ - simde_svfloat16_t: simde_svreinterpret_s64_f16, \ - simde_svfloat32_t: simde_svreinterpret_s64_f32, \ - simde_svfloat64_t: simde_svreinterpret_s64_f64)(op)) - #define simde_svreinterpret_s64(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_u8_s8, \ - simde_svint16_t: simde_svreinterpret_u8_s16, \ - simde_svint32_t: simde_svreinterpret_u8_s32, \ - simde_svint64_t: simde_svreinterpret_u8_s64, \ - simde_svuint16_t: simde_svreinterpret_u8_u16, \ - simde_svuint32_t: simde_svreinterpret_u8_u32, \ - simde_svuint64_t: simde_svreinterpret_u8_u64, \ - simde_svfloat16_t: simde_svreinterpret_u8_f16, \ - simde_svfloat32_t: simde_svreinterpret_u8_f32, \ - simde_svfloat64_t: simde_svreinterpret_u8_f64)(op)) - #define simde_svreinterpret_u8(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_u16_s8, \ - simde_svint16_t: simde_svreinterpret_u16_s16, \ - simde_svint32_t: simde_svreinterpret_u16_s32, \ - simde_svint64_t: simde_svreinterpret_u16_s64, \ - simde_svuint8_t: simde_svreinterpret_u16_u8, \ - simde_svuint32_t: simde_svreinterpret_u16_u32, \ - simde_svuint64_t: simde_svreinterpret_u16_u64, \ - simde_svfloat16_t: simde_svreinterpret_u16_f16, \ - simde_svfloat32_t: simde_svreinterpret_u16_f32, \ - simde_svfloat64_t: simde_svreinterpret_u16_f64)(op)) - #define simde_svreinterpret_u16(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_u32_s8, \ - simde_svint16_t: simde_svreinterpret_u32_s16, \ - simde_svint32_t: simde_svreinterpret_u32_s32, \ - simde_svint64_t: simde_svreinterpret_u32_s64, \ - simde_svuint8_t: simde_svreinterpret_u32_u8, \ - simde_svuint16_t: simde_svreinterpret_u32_u16, \ - simde_svuint64_t: simde_svreinterpret_u32_u64, \ - simde_svfloat16_t: simde_svreinterpret_u32_f16, \ - simde_svfloat32_t: simde_svreinterpret_u32_f32, \ - simde_svfloat64_t: simde_svreinterpret_u32_f64)(op)) - #define simde_svreinterpret_u32(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_u64_s8, \ - simde_svint16_t: simde_svreinterpret_u64_s16, \ - simde_svint32_t: simde_svreinterpret_u64_s32, \ - simde_svint64_t: simde_svreinterpret_u64_s64, \ - simde_svuint8_t: simde_svreinterpret_u64_u8, \ - simde_svuint16_t: simde_svreinterpret_u64_u16, \ - simde_svuint32_t: simde_svreinterpret_u64_u32, \ - simde_svfloat16_t: simde_svreinterpret_u64_f16, \ - simde_svfloat32_t: simde_svreinterpret_u64_f32, \ - simde_svfloat64_t: simde_svreinterpret_u64_f64)(op)) - #define simde_svreinterpret_u64(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_f16_s8, \ - simde_svint16_t: simde_svreinterpret_f16_s16, \ - simde_svint32_t: simde_svreinterpret_f16_s32, \ - simde_svint64_t: simde_svreinterpret_f16_s64, \ - simde_svuint8_t: simde_svreinterpret_f16_u8, \ - simde_svuint16_t: simde_svreinterpret_f16_u16, \ - simde_svuint32_t: simde_svreinterpret_f16_u32, \ - simde_svuint64_t: simde_svreinterpret_f16_u64, \ - simde_svfloat32_t: simde_svreinterpret_f16_f32, \ - simde_svfloat64_t: simde_svreinterpret_f16_f64)(op)) - #define simde_svreinterpret_f16(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_f32_s8, \ - simde_svint16_t: simde_svreinterpret_f32_s16, \ - simde_svint32_t: simde_svreinterpret_f32_s32, \ - simde_svint64_t: simde_svreinterpret_f32_s64, \ - simde_svuint8_t: simde_svreinterpret_f32_u8, \ - simde_svuint16_t: simde_svreinterpret_f32_u16, \ - simde_svuint32_t: simde_svreinterpret_f32_u32, \ - simde_svuint64_t: simde_svreinterpret_f32_u64, \ - simde_svfloat16_t: simde_svreinterpret_f32_f16, \ - simde_svfloat64_t: simde_svreinterpret_f32_f64)(op)) - #define simde_svreinterpret_f32(op) \ - (_Generic((op), \ - simde_svint8_t: simde_svreinterpret_f64_s8, \ - simde_svint16_t: simde_svreinterpret_f64_s16, \ - simde_svint32_t: simde_svreinterpret_f64_s32, \ - simde_svint64_t: simde_svreinterpret_f64_s64, \ - simde_svuint8_t: simde_svreinterpret_f64_u8, \ - simde_svuint16_t: simde_svreinterpret_f64_u16, \ - simde_svuint32_t: simde_svreinterpret_f64_u32, \ - simde_svuint64_t: simde_svreinterpret_f64_u64, \ - simde_svfloat16_t: simde_svreinterpret_f64_f16, \ - simde_svfloat32_t: simde_svreinterpret_f64_f32)(op)) - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #define svreinterpret_f64(op) \ - (_Generic((op), \ - svint16_t: svreinterpret_s8_s16, \ - svint32_t: svreinterpret_s8_s32, \ - svint64_t: svreinterpret_s8_s64, \ - svuint8_t: svreinterpret_s8_u8, \ - svuint16_t: svreinterpret_s8_u16, \ - svuint32_t: svreinterpret_s8_u32, \ - svuint64_t: svreinterpret_s8_u64, \ - svfloat16_t: svreinterpret_s8_f16, \ - svfloat32_t: svreinterpret_s8_f32, \ - svfloat64_t: svreinterpret_s8_f64)(op)) - #define svreinterpret_s8(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_s16_s8, \ - svint32_t: svreinterpret_s16_s32, \ - svint64_t: svreinterpret_s16_s64, \ - svuint8_t: svreinterpret_s16_u8, \ - svuint16_t: svreinterpret_s16_u16, \ - svuint32_t: svreinterpret_s16_u32, \ - svuint64_t: svreinterpret_s16_u64, \ - svfloat16_t: svreinterpret_s16_f16, \ - svfloat32_t: svreinterpret_s16_f32, \ - svfloat64_t: svreinterpret_s16_f64)(op)) - #define svreinterpret_s16(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_s32_s8, \ - svint16_t: svreinterpret_s32_s16, \ - svint64_t: svreinterpret_s32_s64, \ - svuint8_t: svreinterpret_s32_u8, \ - svuint16_t: svreinterpret_s32_u16, \ - svuint32_t: svreinterpret_s32_u32, \ - svuint64_t: svreinterpret_s32_u64, \ - svfloat16_t: svreinterpret_s32_f16, \ - svfloat32_t: svreinterpret_s32_f32, \ - svfloat64_t: svreinterpret_s32_f64)(op)) - #define svreinterpret_s32(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_s64_s8, \ - svint16_t: svreinterpret_s64_s16, \ - svint32_t: svreinterpret_s64_s32, \ - svuint8_t: svreinterpret_s64_u8, \ - svuint16_t: svreinterpret_s64_u16, \ - svuint32_t: svreinterpret_s64_u32, \ - svuint64_t: svreinterpret_s64_u64, \ - svfloat16_t: svreinterpret_s64_f16, \ - svfloat32_t: svreinterpret_s64_f32, \ - svfloat64_t: svreinterpret_s64_f64)(op)) - #define svreinterpret_s64(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_u8_s8, \ - svint16_t: svreinterpret_u8_s16, \ - svint32_t: svreinterpret_u8_s32, \ - svint64_t: svreinterpret_u8_s64, \ - svuint16_t: svreinterpret_u8_u16, \ - svuint32_t: svreinterpret_u8_u32, \ - svuint64_t: svreinterpret_u8_u64, \ - svfloat16_t: svreinterpret_u8_f16, \ - svfloat32_t: svreinterpret_u8_f32, \ - svfloat64_t: svreinterpret_u8_f64)(op)) - #define svreinterpret_u8(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_u16_s8, \ - svint16_t: svreinterpret_u16_s16, \ - svint32_t: svreinterpret_u16_s32, \ - svint64_t: svreinterpret_u16_s64, \ - svuint8_t: svreinterpret_u16_u8, \ - svuint32_t: svreinterpret_u16_u32, \ - svuint64_t: svreinterpret_u16_u64, \ - svfloat16_t: svreinterpret_u16_f16, \ - svfloat32_t: svreinterpret_u16_f32, \ - svfloat64_t: svreinterpret_u16_f64)(op)) - #define svreinterpret_u16(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_u32_s8, \ - svint16_t: svreinterpret_u32_s16, \ - svint32_t: svreinterpret_u32_s32, \ - svint64_t: svreinterpret_u32_s64, \ - svuint8_t: svreinterpret_u32_u8, \ - svuint16_t: svreinterpret_u32_u16, \ - svuint64_t: svreinterpret_u32_u64, \ - svfloat16_t: svreinterpret_u32_f16, \ - svfloat32_t: svreinterpret_u32_f32, \ - svfloat64_t: svreinterpret_u32_f64)(op)) - #define svreinterpret_u32(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_u64_s8, \ - svint16_t: svreinterpret_u64_s16, \ - svint32_t: svreinterpret_u64_s32, \ - svint64_t: svreinterpret_u64_s64, \ - svuint8_t: svreinterpret_u64_u8, \ - svuint16_t: svreinterpret_u64_u16, \ - svuint32_t: svreinterpret_u64_u32, \ - svfloat16_t: svreinterpret_u64_f16, \ - svfloat32_t: svreinterpret_u64_f32, \ - svfloat64_t: svreinterpret_u64_f64)(op)) - #define svreinterpret_u64(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_f16_s8, \ - svint16_t: svreinterpret_f16_s16, \ - svint32_t: svreinterpret_f16_s32, \ - svint64_t: svreinterpret_f16_s64, \ - svuint8_t: svreinterpret_f16_u8, \ - svuint16_t: svreinterpret_f16_u16, \ - svuint32_t: svreinterpret_f16_u32, \ - svuint64_t: svreinterpret_f16_u64, \ - svfloat32_t: svreinterpret_f16_f32, \ - svfloat64_t: svreinterpret_f16_f64)(op)) - #define svreinterpret_f16(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_f32_s8, \ - svint16_t: svreinterpret_f32_s16, \ - svint32_t: svreinterpret_f32_s32, \ - svint64_t: svreinterpret_f32_s64, \ - svuint8_t: svreinterpret_f32_u8, \ - svuint16_t: svreinterpret_f32_u16, \ - svuint32_t: svreinterpret_f32_u32, \ - svuint64_t: svreinterpret_f32_u64, \ - svfloat16_t: svreinterpret_f32_f16, \ - svfloat64_t: svreinterpret_f32_f64)(op)) - #define svreinterpret_f32(op) \ - (_Generic((op), \ - svint8_t: svreinterpret_f64_s8, \ - svint16_t: svreinterpret_f64_s16, \ - svint32_t: svreinterpret_f64_s32, \ - svint64_t: svreinterpret_f64_s64, \ - svuint8_t: svreinterpret_f64_u8, \ - svuint16_t: svreinterpret_f64_u16, \ - svuint32_t: svreinterpret_f64_u32, \ - svuint64_t: svreinterpret_f64_u64, \ - svfloat16_t: svreinterpret_f64_f16, \ - svfloat32_t: svreinterpret_f64_f32)(op)) - #endif /* defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) */ -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_REINTERPRET_H */ -/* :: End simde/arm/sve/reinterpret.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_x_svsel_s8_z(simde_svbool_t pg, simde_svint8_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s8_z(pg, op1, op1); - #else - simde_svint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s8(pg.neon_i8, op1.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_mov_epi8(simde_svbool_to_mmask64(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_mov_epi8(simde_svbool_to_mmask32(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(pg.altivec_b8, op1.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = pg.values_i8 & op1.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, op1.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = pg.values_i8 & op1.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = pg.values_i8[i] & op1.values[i]; - } - #endif - - return r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsel_s8(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_s8(pg, op1, op2); - #else - simde_svint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vbslq_s8(pg.neon_u8, op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_mask_mov_epi8(op2.m512i, simde_svbool_to_mmask64(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_mask_mov_epi8(op2.m256i[0], simde_svbool_to_mmask32(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); - } - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = (pg.values_i8 & op1.values) | (~pg.values_i8 & op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = (pg.values_i8[i] & op1.values[i]) | (~pg.values_i8[i] & op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_s8 - #define svsel_s8(pg, op1, op2) simde_svsel_s8(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_x_svsel_s16_z(simde_svbool_t pg, simde_svint16_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s16_z(pg, op1, op1); - #else - simde_svint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s16(pg.neon_i16, op1.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_mov_epi16(simde_svbool_to_mmask32(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_mov_epi16(simde_svbool_to_mmask16(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(pg.altivec_b16, op1.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = pg.values_i16 & op1.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, op1.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = pg.values_i16 & op1.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = pg.values_i16[i] & op1.values[i]; - } - #endif - - return r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsel_s16(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_s16(pg, op1, op2); - #else - simde_svint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vbslq_s16(pg.neon_u16, op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_mask_mov_epi16(op2.m512i, simde_svbool_to_mmask32(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_mask_mov_epi16(op2.m256i[0], simde_svbool_to_mmask16(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); - } - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = (pg.values_i16 & op1.values) | (~pg.values_i16 & op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = (pg.values_i16[i] & op1.values[i]) | (~pg.values_i16[i] & op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_s16 - #define svsel_s16(pg, op1, op2) simde_svsel_s16(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_x_svsel_s32_z(simde_svbool_t pg, simde_svint32_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s32_z(pg, op1, op1); - #else - simde_svint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s32(pg.neon_i32, op1.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_mov_epi32(simde_svbool_to_mmask16(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_mov_epi32(simde_svbool_to_mmask8(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(pg.altivec_b32, op1.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = pg.values_i32 & op1.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, op1.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = pg.values_i32 & op1.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = pg.values_i32[i] & op1.values[i]; - } - #endif - - return r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsel_s32(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_s32(pg, op1, op2); - #else - simde_svint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vbslq_s32(pg.neon_u32, op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_mask_mov_epi32(op2.m512i, simde_svbool_to_mmask16(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_mask_mov_epi32(op2.m256i[0], simde_svbool_to_mmask8(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); - } - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = (pg.values_i32 & op1.values) | (~pg.values_i32 & op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = (pg.values_i32[i] & op1.values[i]) | (~pg.values_i32[i] & op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_s32 - #define svsel_s32(pg, op1, op2) simde_svsel_s32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_x_svsel_s64_z(simde_svbool_t pg, simde_svint64_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s64_z(pg, op1, op1); - #else - simde_svint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s64(pg.neon_i64, op1.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_maskz_mov_epi64(simde_svbool_to_mmask8(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_maskz_mov_epi64(simde_svbool_to_mmask4(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r.altivec = vec_and(pg.altivec_b64, op1.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = HEDLEY_REINTERPRET_CAST(__typeof__(op1.altivec), pg.values_i64) & op1.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, op1.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = pg.values_i64 & op1.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = pg.values_i64[i] & op1.values[i]; - } - #endif - - return r; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsel_s64(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_s64(pg, op1, op2); - #else - simde_svint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vbslq_s64(pg.neon_u64, op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m512i = _mm512_mask_mov_epi64(op2.m512i, simde_svbool_to_mmask8(pg), op1.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r.m256i[0] = _mm256_mask_mov_epi64(op2.m256i[0], simde_svbool_to_mmask4(pg), op1.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); - } - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); - } - #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && !defined(SIMDE_BUG_CLANG_46770) - r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = (pg.values_i64 & op1.values) | (~pg.values_i64 & op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = (pg.values_i64[i] & op1.values[i]) | (~pg.values_i64[i] & op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_s64 - #define svsel_s64(pg, op1, op2) simde_svsel_s64(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_x_svsel_u8_z(simde_svbool_t pg, simde_svuint8_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u8_z(pg, op1, op1); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svuint8_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_maskz_mov_epi8(simde_svbool_to_mmask64(pg), op1.m512i); - #else - r.m256i[0] = _mm256_maskz_mov_epi8(simde_svbool_to_mmask32(pg), op1.m256i[0]); - #endif - - return r; - #else - return simde_svreinterpret_u8_s8(simde_x_svsel_s8_z(pg, simde_svreinterpret_s8_u8(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsel_u8(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_u8(pg, op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svuint8_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_mask_mov_epi8(op2.m512i, simde_svbool_to_mmask64(pg), op1.m512i); - #else - r.m256i[0] = _mm256_mask_mov_epi8(op2.m256i[0], simde_svbool_to_mmask32(pg), op1.m256i[0]); - #endif - - return r; - #else - return simde_svreinterpret_u8_s8(simde_svsel_s8(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_u8 - #define svsel_u8(pg, op1, op2) simde_svsel_u8(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_x_svsel_u16_z(simde_svbool_t pg, simde_svuint16_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u16_z(pg, op1, op1); - #else - return simde_svreinterpret_u16_s16(simde_x_svsel_s16_z(pg, simde_svreinterpret_s16_u16(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsel_u16(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_u16(pg, op1, op2); - #else - return simde_svreinterpret_u16_s16(simde_svsel_s16(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_u16 - #define svsel_u16(pg, op1, op2) simde_svsel_u16(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_x_svsel_u32_z(simde_svbool_t pg, simde_svuint32_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u32_z(pg, op1, op1); - #else - return simde_svreinterpret_u32_s32(simde_x_svsel_s32_z(pg, simde_svreinterpret_s32_u32(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsel_u32(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_u32(pg, op1, op2); - #else - return simde_svreinterpret_u32_s32(simde_svsel_s32(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_u32 - #define svsel_u32(pg, op1, op2) simde_svsel_u32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_x_svsel_u64_z(simde_svbool_t pg, simde_svuint64_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u64_z(pg, op1, op1); - #else - return simde_svreinterpret_u64_s64(simde_x_svsel_s64_z(pg, simde_svreinterpret_s64_u64(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsel_u64(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_u64(pg, op1, op2); - #else - return simde_svreinterpret_u64_s64(simde_svsel_s64(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_u64 - #define svsel_u64(pg, op1, op2) simde_svsel_u64(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_x_svsel_f32_z(simde_svbool_t pg, simde_svfloat32_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return simde_svreinterpret_f32_s32(svand_s32_z(pg, simde_svreinterpret_s32_f32(op1), simde_svreinterpret_s32_f32(op1))); - #else - return simde_svreinterpret_f32_s32(simde_x_svsel_s32_z(pg, simde_svreinterpret_s32_f32(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsel_f32(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_f32(pg, op1, op2); - #else - return simde_svreinterpret_f32_s32(simde_svsel_s32(pg, simde_svreinterpret_s32_f32(op1), simde_svreinterpret_s32_f32(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_f32 - #define svsel_f32(pg, op1, op2) simde_svsel_f32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_x_svsel_f64_z(simde_svbool_t pg, simde_svfloat64_t op1) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return simde_svreinterpret_f64_s64(svand_s64_z(pg, simde_svreinterpret_s64_f64(op1), simde_svreinterpret_s64_f64(op1))); - #else - return simde_svreinterpret_f64_s64(simde_x_svsel_s64_z(pg, simde_svreinterpret_s64_f64(op1))); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsel_f64(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsel_f64(pg, op1, op2); - #else - return simde_svreinterpret_f64_s64(simde_svsel_s64(pg, simde_svreinterpret_s64_f64(op1), simde_svreinterpret_s64_f64(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsel_f64 - #define svsel_f64(pg, op1, op2) simde_svsel_f64(pg, op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_x_svsel_z(simde_svbool_t pg, simde_svint8_t op1) { return simde_x_svsel_s8_z (pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_x_svsel_z(simde_svbool_t pg, simde_svint16_t op1) { return simde_x_svsel_s16_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_x_svsel_z(simde_svbool_t pg, simde_svint32_t op1) { return simde_x_svsel_s32_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_x_svsel_z(simde_svbool_t pg, simde_svint64_t op1) { return simde_x_svsel_s64_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint8_t op1) { return simde_x_svsel_u8_z (pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint16_t op1) { return simde_x_svsel_u16_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint32_t op1) { return simde_x_svsel_u32_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint64_t op1) { return simde_x_svsel_u64_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_x_svsel_z(simde_svbool_t pg, simde_svfloat32_t op1) { return simde_x_svsel_f32_z(pg, op1); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_x_svsel_z(simde_svbool_t pg, simde_svfloat64_t op1) { return simde_x_svsel_f64_z(pg, op1); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsel(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsel_s8 (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsel(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsel_s16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsel(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsel_s32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsel(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsel_s64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsel(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsel_u8 (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsel(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsel_u16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsel(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsel_u32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsel(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsel_u64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsel(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsel_f32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsel(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsel_f64(pg, op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_x_svsel_z(pg, op1) \ - (SIMDE_GENERIC_((op1), \ - simde_svint8_t: simde_x_svsel_s8_z, \ - simde_svint16_t: simde_x_svsel_s16_z, \ - simde_svint32_t: simde_x_svsel_s32_z, \ - simde_svint64_t: simde_x_svsel_s64_z, \ - simde_svuint8_t: simde_x_svsel_u8_z, \ - simde_svuint16_t: simde_x_svsel_u16_z, \ - simde_svuint32_t: simde_x_svsel_u32_z, \ - simde_svuint64_t: simde_x_svsel_u64_z, \ - simde_svfloat32_t: simde_x_svsel_f32_z, \ - simde_svfloat64_t: simde_x_svsel_f64_z)((pg), (op1))) - - #define simde_svsel(pg, op1, op2) \ - (SIMDE_GENERIC_((op1), \ - simde_svint8_t: simde_svsel_s8, \ - simde_svint16_t: simde_svsel_s16, \ - simde_svint32_t: simde_svsel_s32, \ - simde_svint64_t: simde_svsel_s64, \ - simde_svuint8_t: simde_svsel_u8, \ - simde_svuint16_t: simde_svsel_u16, \ - simde_svuint32_t: simde_svsel_u32, \ - simde_svuint64_t: simde_svsel_u64, \ - simde_svfloat32_t: simde_svsel_f32, \ - simde_svfloat64_t: simde_svsel_f64)((pg), (op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svsel - #define svsel(pg, op1) simde_svsel((pg), (op1)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_SEL_H */ -/* :: End simde/arm/sve/sel.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/dup.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_DUP_H) -#define SIMDE_ARM_SVE_DUP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_n_s8(int8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s8(op); - #else - simde_svint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_s8(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi8(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi8(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi8(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s8 - #define svdup_n_s8(op) simde_svdup_n_s8((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_s8(int8_t op) { - return simde_svdup_n_s8(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s8 - #define svdup_s8(op) simde_svdup_n_s8((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_n_s8_z(simde_svbool_t pg, int8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s8_z(pg, op); - #else - return simde_x_svsel_s8_z(pg, simde_svdup_n_s8(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s8_z - #define svdup_n_s8_z(pg, op) simde_svdup_n_s8_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_s8_z(simde_svbool_t pg, int8_t op) { - return simde_svdup_n_s8_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s8_z - #define svdup_s8_z(pg, op) simde_svdup_n_s8_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_n_s8_m(simde_svint8_t inactive, simde_svbool_t pg, int8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s8_m(inactive, pg, op); - #else - return simde_svsel_s8(pg, simde_svdup_n_s8(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s8_m - #define svdup_n_s8_m(inactive, pg, op) simde_svdup_n_s8_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svdup_s8_m(simde_svint8_t inactive, simde_svbool_t pg, int8_t op) { - return simde_svdup_n_s8_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s8_m - #define svdup_s8_m(inactive, pg, op) simde_svdup_n_s8_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_n_s16(int16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s16(op); - #else - simde_svint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_s16(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi16(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi16(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi16(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s16 - #define svdup_n_s16(op) simde_svdup_n_s16((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_s16(int16_t op) { - return simde_svdup_n_s16(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s16 - #define svdup_s16(op) simde_svdup_n_s16((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_n_s16_z(simde_svbool_t pg, int16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s16_z(pg, op); - #else - return simde_x_svsel_s16_z(pg, simde_svdup_n_s16(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s16_z - #define svdup_n_s16_z(pg, op) simde_svdup_n_s16_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_s16_z(simde_svbool_t pg, int8_t op) { - return simde_svdup_n_s16_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s16_z - #define svdup_s16_z(pg, op) simde_svdup_n_s16_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_n_s16_m(simde_svint16_t inactive, simde_svbool_t pg, int16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s16_m(inactive, pg, op); - #else - return simde_svsel_s16(pg, simde_svdup_n_s16(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s16_m - #define svdup_n_s16_m(inactive, pg, op) simde_svdup_n_s16_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svdup_s16_m(simde_svint16_t inactive, simde_svbool_t pg, int16_t op) { - return simde_svdup_n_s16_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s16_m - #define svdup_s16_m(inactive, pg, op) simde_svdup_n_s16_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_n_s32(int32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s32(op); - #else - simde_svint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_s32(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi32(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi32(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi32(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s32 - #define svdup_n_s32(op) simde_svdup_n_s32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_s32(int8_t op) { - return simde_svdup_n_s32(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s32 - #define svdup_s32(op) simde_svdup_n_s32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_n_s32_z(simde_svbool_t pg, int32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s32_z(pg, op); - #else - return simde_x_svsel_s32_z(pg, simde_svdup_n_s32(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s32_z - #define svdup_n_s32_z(pg, op) simde_svdup_n_s32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_s32_z(simde_svbool_t pg, int32_t op) { - return simde_svdup_n_s32_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s32_z - #define svdup_s32_z(pg, op) simde_svdup_n_s32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_n_s32_m(simde_svint32_t inactive, simde_svbool_t pg, int32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s32_m(inactive, pg, op); - #else - return simde_svsel_s32(pg, simde_svdup_n_s32(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s32_m - #define svdup_n_s32_m(inactive, pg, op) simde_svdup_n_s32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svdup_s32_m(simde_svint32_t inactive, simde_svbool_t pg, int32_t op) { - return simde_svdup_n_s32_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s32_m - #define svdup_s32_m(inactive, pg, op) simde_svdup_n_s32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_n_s64(int64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s64(op); - #else - simde_svint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_s64(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi64(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi64x(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi64x(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(HEDLEY_STATIC_CAST(signed long long int, op)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s64 - #define svdup_n_s64(op) simde_svdup_n_s64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_s64(int64_t op) { - return simde_svdup_n_s64(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s64 - #define svdup_s64(op) simde_svdup_n_s64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_n_s64_z(simde_svbool_t pg, int64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s64_z(pg, op); - #else - return simde_x_svsel_s64_z(pg, simde_svdup_n_s64(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s64_z - #define svdup_n_s64_z(pg, op) simde_svdup_n_s64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_s64_z(simde_svbool_t pg, int64_t op) { - return simde_svdup_n_s64_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s64_z - #define svdup_s64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_n_s64_m(simde_svint64_t inactive, simde_svbool_t pg, int64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_s64_m(inactive, pg, op); - #else - return simde_svsel_s64(pg, simde_svdup_n_s64(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_s64_m - #define svdup_n_s64_m(inactive, pg, op) simde_svdup_n_s64_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svdup_s64_m(simde_svint64_t inactive, simde_svbool_t pg, int64_t op) { - return simde_svdup_n_s64_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_s64_m - #define svdup_s64_m(inactive, pg, op) simde_svdup_n_s64_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_n_u8(uint8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u8(op); - #else - simde_svuint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_u8(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, op)); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u8 - #define svdup_n_u8(op) simde_svdup_n_u8((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_u8(uint8_t op) { - return simde_svdup_n_u8(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u8 - #define svdup_u8(op) simde_svdup_n_u8((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_n_u8_z(simde_svbool_t pg, uint8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u8_z(pg, op); - #else - return simde_x_svsel_u8_z(pg, simde_svdup_n_u8(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u8_z - #define svdup_n_u8_z(pg, op) simde_svdup_n_u8_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_u8_z(simde_svbool_t pg, uint8_t op) { - return simde_svdup_n_u8_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u8_z - #define svdup_u8_z(pg, op) simde_svdup_n_u8_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_n_u8_m(simde_svuint8_t inactive, simde_svbool_t pg, uint8_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u8_m(inactive, pg, op); - #else - return simde_svsel_u8(pg, simde_svdup_n_u8(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u8_m - #define svdup_n_u8_m(inactive, pg, op) simde_svdup_n_u8_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svdup_u8_m(simde_svuint8_t inactive, simde_svbool_t pg, uint8_t op) { - return simde_svdup_n_u8_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u8_m - #define svdup_u8_m(inactive, pg, op) simde_svdup_n_u8_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_n_u16(uint16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u16(op); - #else - simde_svuint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_u16(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, op)); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u16 - #define svdup_n_u16(op) simde_svdup_n_u16((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_u16(uint16_t op) { - return simde_svdup_n_u16(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u16 - #define svdup_u16(op) simde_svdup_n_u16((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_n_u16_z(simde_svbool_t pg, uint16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u16_z(pg, op); - #else - return simde_x_svsel_u16_z(pg, simde_svdup_n_u16(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u16_z - #define svdup_n_u16_z(pg, op) simde_svdup_n_u16_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_u16_z(simde_svbool_t pg, uint8_t op) { - return simde_svdup_n_u16_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u16_z - #define svdup_u16_z(pg, op) simde_svdup_n_u16_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_n_u16_m(simde_svuint16_t inactive, simde_svbool_t pg, uint16_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u16_m(inactive, pg, op); - #else - return simde_svsel_u16(pg, simde_svdup_n_u16(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u16_m - #define svdup_n_u16_m(inactive, pg, op) simde_svdup_n_u16_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svdup_u16_m(simde_svuint16_t inactive, simde_svbool_t pg, uint16_t op) { - return simde_svdup_n_u16_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u16_m - #define svdup_u16_m(inactive, pg, op) simde_svdup_n_u16_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_n_u32(uint32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u32(op); - #else - simde_svuint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_u32(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, op)); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u32 - #define svdup_n_u32(op) simde_svdup_n_u32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_u32(uint8_t op) { - return simde_svdup_n_u32(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u32 - #define svdup_u32(op) simde_svdup_n_u32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_n_u32_z(simde_svbool_t pg, uint32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u32_z(pg, op); - #else - return simde_x_svsel_u32_z(pg, simde_svdup_n_u32(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u32_z - #define svdup_n_u32_z(pg, op) simde_svdup_n_u32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_u32_z(simde_svbool_t pg, uint32_t op) { - return simde_svdup_n_u32_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u32_z - #define svdup_u32_z(pg, op) simde_svdup_n_u32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_n_u32_m(simde_svuint32_t inactive, simde_svbool_t pg, uint32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u32_m(inactive, pg, op); - #else - return simde_svsel_u32(pg, simde_svdup_n_u32(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u32_m - #define svdup_n_u32_m(inactive, pg, op) simde_svdup_n_u32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svdup_u32_m(simde_svuint32_t inactive, simde_svbool_t pg, uint32_t op) { - return simde_svdup_n_u32_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u32_m - #define svdup_u32_m(inactive, pg, op) simde_svdup_n_u32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_n_u64(uint64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u64(op); - #else - simde_svuint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_u64(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, op)); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, op)); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, op)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(HEDLEY_STATIC_CAST(unsigned long long int, op)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, op)); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u64 - #define svdup_n_u64(op) simde_svdup_n_u64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_u64(uint64_t op) { - return simde_svdup_n_u64(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u64 - #define svdup_u64(op) simde_svdup_n_u64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_n_u64_z(simde_svbool_t pg, uint64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u64_z(pg, op); - #else - return simde_x_svsel_u64_z(pg, simde_svdup_n_u64(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u64_z - #define svdup_n_u64_z(pg, op) simde_svdup_n_u64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_u64_z(simde_svbool_t pg, uint64_t op) { - return simde_svdup_n_u64_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u64_z - #define svdup_u64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_n_u64_m(simde_svuint64_t inactive, simde_svbool_t pg, uint64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_u64_m(inactive, pg, op); - #else - return simde_svsel_u64(pg, simde_svdup_n_u64(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_u64_m - #define svdup_n_u64_m(inactive, pg, op) simde_svdup_n_u64_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svdup_u64_m(simde_svuint64_t inactive, simde_svbool_t pg, uint64_t op) { - return simde_svdup_n_u64_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_u64_m - #define svdup_u64_m(inactive, pg, op) simde_svdup_n_u64_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_n_f32(simde_float32 op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f32(op); - #else - simde_svfloat32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vdupq_n_f32(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512 = _mm512_set1_ps(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { - r.m256[i] = _mm256_set1_ps(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { - r.m128[i] = _mm_set1_ps(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f32x4_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f32 - #define svdup_n_f32(op) simde_svdup_n_f32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_f32(int8_t op) { - return simde_svdup_n_f32(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f32 - #define svdup_f32(op) simde_svdup_n_f32((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_n_f32_z(simde_svbool_t pg, simde_float32 op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f32_z(pg, op); - #else - return simde_x_svsel_f32_z(pg, simde_svdup_n_f32(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f32_z - #define svdup_n_f32_z(pg, op) simde_svdup_n_f32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_f32_z(simde_svbool_t pg, simde_float32 op) { - return simde_svdup_n_f32_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f32_z - #define svdup_f32_z(pg, op) simde_svdup_n_f32_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_n_f32_m(simde_svfloat32_t inactive, simde_svbool_t pg, simde_float32_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f32_m(inactive, pg, op); - #else - return simde_svsel_f32(pg, simde_svdup_n_f32(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f32_m - #define svdup_n_f32_m(inactive, pg, op) simde_svdup_n_f32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svdup_f32_m(simde_svfloat32_t inactive, simde_svbool_t pg, simde_float32_t op) { - return simde_svdup_n_f32_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f32_m - #define svdup_f32_m(inactive, pg, op) simde_svdup_n_f32_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_n_f64(simde_float64 op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f64(op); - #else - simde_svfloat64_t r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon = vdupq_n_f64(op); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512d = _mm512_set1_pd(op); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { - r.m256d[i] = _mm256_set1_pd(op); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { - r.m128d[i] = _mm_set1_pd(op); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = vec_splats(op); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f64x2_splat(op); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f64 - #define svdup_n_f64(op) simde_svdup_n_f64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_f64(simde_float64 op) { - return simde_svdup_n_f64(op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f64 - #define svdup_f64(op) simde_svdup_n_f64((op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_n_f64_z(simde_svbool_t pg, simde_float64 op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f64_z(pg, op); - #else - return simde_x_svsel_f64_z(pg, simde_svdup_n_f64(op)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f64_z - #define svdup_n_f64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_f64_z(simde_svbool_t pg, simde_float64 op) { - return simde_svdup_n_f64_z(pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f64_z - #define svdup_f64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_n_f64_m(simde_svfloat64_t inactive, simde_svbool_t pg, simde_float64_t op) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svdup_n_f64_m(inactive, pg, op); - #else - return simde_svsel_f64(pg, simde_svdup_n_f64(op), inactive); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_n_f64_m - #define svdup_n_f64_m(inactive, pg, op) simde_svdup_n_f64_m((inactive), (pg), (op)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svdup_f64_m(simde_svfloat64_t inactive, simde_svbool_t pg, simde_float64_t op) { - return simde_svdup_n_f64_m(inactive, pg, op); -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svdup_f64_m - #define svdup_f64_m(inactive, pg, op) simde_svdup_n_f64_m((inactive), (pg), (op)) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_n ( int8_t op) { return simde_svdup_n_s8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup ( int8_t op) { return simde_svdup_n_s8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_n_z(simde_svbool_t pg, int8_t op) { return simde_svdup_n_s8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_z (simde_svbool_t pg, int8_t op) { return simde_svdup_n_s8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_n ( int16_t op) { return simde_svdup_n_s16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup ( int16_t op) { return simde_svdup_n_s16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_n_z(simde_svbool_t pg, int16_t op) { return simde_svdup_n_s16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_z (simde_svbool_t pg, int16_t op) { return simde_svdup_n_s16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_n ( int32_t op) { return simde_svdup_n_s32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup ( int32_t op) { return simde_svdup_n_s32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_n_z(simde_svbool_t pg, int32_t op) { return simde_svdup_n_s32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_z (simde_svbool_t pg, int32_t op) { return simde_svdup_n_s32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_n ( int64_t op) { return simde_svdup_n_s64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup ( int64_t op) { return simde_svdup_n_s64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_n_z(simde_svbool_t pg, int64_t op) { return simde_svdup_n_s64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_z (simde_svbool_t pg, int64_t op) { return simde_svdup_n_s64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_n ( uint8_t op) { return simde_svdup_n_u8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup ( uint8_t op) { return simde_svdup_n_u8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_n_z(simde_svbool_t pg, uint8_t op) { return simde_svdup_n_u8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_z (simde_svbool_t pg, uint8_t op) { return simde_svdup_n_u8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_n ( uint16_t op) { return simde_svdup_n_u16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup ( uint16_t op) { return simde_svdup_n_u16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_n_z(simde_svbool_t pg, uint16_t op) { return simde_svdup_n_u16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_z (simde_svbool_t pg, uint16_t op) { return simde_svdup_n_u16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_n ( uint32_t op) { return simde_svdup_n_u32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup ( uint32_t op) { return simde_svdup_n_u32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_n_z(simde_svbool_t pg, uint32_t op) { return simde_svdup_n_u32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_z (simde_svbool_t pg, uint32_t op) { return simde_svdup_n_u32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_n ( uint64_t op) { return simde_svdup_n_u64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup ( uint64_t op) { return simde_svdup_n_u64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_n_z(simde_svbool_t pg, uint64_t op) { return simde_svdup_n_u64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_z (simde_svbool_t pg, uint64_t op) { return simde_svdup_n_u64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_n ( simde_float32 op) { return simde_svdup_n_f32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup ( simde_float32 op) { return simde_svdup_n_f32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_n_z(simde_svbool_t pg, simde_float32 op) { return simde_svdup_n_f32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_z (simde_svbool_t pg, simde_float32 op) { return simde_svdup_n_f32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_n ( simde_float64 op) { return simde_svdup_n_f64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup ( simde_float64 op) { return simde_svdup_n_f64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_n_z(simde_svbool_t pg, simde_float64 op) { return simde_svdup_n_f64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_z (simde_svbool_t pg, simde_float64 op) { return simde_svdup_n_f64_z (pg, op); } - - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_n ( int8_t op) { return svdup_n_s8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup ( int8_t op) { return svdup_n_s8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_n_z(svbool_t pg, int8_t op) { return svdup_n_s8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_z (svbool_t pg, int8_t op) { return svdup_n_s8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_n ( int16_t op) { return svdup_n_s16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup ( int16_t op) { return svdup_n_s16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_n_z(svbool_t pg, int16_t op) { return svdup_n_s16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_z (svbool_t pg, int16_t op) { return svdup_n_s16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_n ( int32_t op) { return svdup_n_s32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup ( int32_t op) { return svdup_n_s32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_n_z(svbool_t pg, int32_t op) { return svdup_n_s32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_z (svbool_t pg, int32_t op) { return svdup_n_s32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_n ( int64_t op) { return svdup_n_s64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup ( int64_t op) { return svdup_n_s64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_n_z(svbool_t pg, int64_t op) { return svdup_n_s64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_z (svbool_t pg, int64_t op) { return svdup_n_s64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_n ( uint8_t op) { return svdup_n_u8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup ( uint8_t op) { return svdup_n_u8 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_n_z(svbool_t pg, uint8_t op) { return svdup_n_u8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_z (svbool_t pg, uint8_t op) { return svdup_n_u8_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_n ( uint16_t op) { return svdup_n_u16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup ( uint16_t op) { return svdup_n_u16 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_n_z(svbool_t pg, uint16_t op) { return svdup_n_u16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_z (svbool_t pg, uint16_t op) { return svdup_n_u16_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_n ( uint32_t op) { return svdup_n_u32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup ( uint32_t op) { return svdup_n_u32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_n_z(svbool_t pg, uint32_t op) { return svdup_n_u32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_z (svbool_t pg, uint32_t op) { return svdup_n_u32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_n ( uint64_t op) { return svdup_n_u64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup ( uint64_t op) { return svdup_n_u64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_n_z(svbool_t pg, uint64_t op) { return svdup_n_u64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_z (svbool_t pg, uint64_t op) { return svdup_n_u64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_n ( simde_float32 op) { return svdup_n_f32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup ( simde_float32 op) { return svdup_n_f32 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_n_z(svbool_t pg, simde_float32 op) { return svdup_n_f32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_z (svbool_t pg, simde_float32 op) { return svdup_n_f32_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_n ( simde_float64 op) { return svdup_n_f64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup ( simde_float64 op) { return svdup_n_f64 ( op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_n_z(svbool_t pg, simde_float64 op) { return svdup_n_f64_z (pg, op); } - SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_z (svbool_t pg, simde_float64 op) { return svdup_n_f64_z (pg, op); } - #endif -#elif defined(SIMDE_GENERIC_) - #define simde_svdup_n(op) \ - (SIMDE_GENERIC_((op), \ - int8_t: simde_svdup_n_s8, \ - int16_t: simde_svdup_n_s16, \ - int32_t: simde_svdup_n_s32, \ - int64_t: simde_svdup_n_s64, \ - uint8_t: simde_svdup_n_u8, \ - uint16_t: simde_svdup_n_u16, \ - uint32_t: simde_svdup_n_u32, \ - uint64_t: simde_svdup_n_u64, \ - float32_t: simde_svdup_n_f32, \ - float64_t: simde_svdup_n_f64)((op))) - #define simde_svdup(op) simde_svdup_n((op)) - - #define simde_svdup_n_z(pg, op) \ - (SIMDE_GENERIC_((op), \ - int8_t: simde_svdup_n_s8_z, \ - int16_t: simde_svdup_n_s16_z, \ - int32_t: simde_svdup_n_s32_z, \ - int64_t: simde_svdup_n_s64_z, \ - uint8_t: simde_svdup_n_s8_z, \ - uint16_t: simde_svdup_n_u16_z, \ - uint32_t: simde_svdup_n_u32_z, \ - uint64_t: simde_svdup_n_u64_z, \ - float32_t: simde_svdup_n_u32_z, \ - float64_t: simde_svdup_n_f64_z)((pg), (op))) - #define simde_svdup_z(pg, op) simde_svdup_n_z((pg), (op)) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svdup - #undef svdup_z - #undef svdup_n - #undef svdup_n_z - #define svdup_n(op) simde_svdup_n((op)) - #define svdup_n_z(pg, op) simde_svdup_n_z((pg), (op)) - #define svdup(op) simde_svdup((op)) - #define svdup_z(pg, op) simde_svdup_z((pg), (op)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_DUP_H */ -/* :: End simde/arm/sve/dup.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s8_x(pg, op1, op2); - #else - simde_svint8_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_s8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi8(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s8_x - #define svadd_s8_x(pg, op1, op2) simde_svadd_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s8_z(pg, op1, op2); - #else - return simde_x_svsel_s8_z(pg, simde_svadd_s8_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s8_z - #define svadd_s8_z(pg, op1, op2) simde_svadd_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s8_m(pg, op1, op2); - #else - return simde_svsel_s8(pg, simde_svadd_s8_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s8_m - #define svadd_s8_m(pg, op1, op2) simde_svadd_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s8_x(pg, op1, op2); - #else - return simde_svadd_s8_x(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s8_x - #define svadd_n_s8_x(pg, op1, op2) simde_svadd_n_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s8_z(pg, op1, op2); - #else - return simde_svadd_s8_z(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s8_z - #define svadd_n_s8_z(pg, op1, op2) simde_svadd_n_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svadd_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s8_m(pg, op1, op2); - #else - return simde_svadd_s8_m(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s8_m - #define svadd_n_s8_m(pg, op1, op2) simde_svadd_n_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s16_x(pg, op1, op2); - #else - simde_svint16_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_s16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi16(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s16_x - #define svadd_s16_x(pg, op1, op2) simde_svadd_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s16_z(pg, op1, op2); - #else - return simde_x_svsel_s16_z(pg, simde_svadd_s16_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s16_z - #define svadd_s16_z(pg, op1, op2) simde_svadd_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s16_m(pg, op1, op2); - #else - return simde_svsel_s16(pg, simde_svadd_s16_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s16_m - #define svadd_s16_m(pg, op1, op2) simde_svadd_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s16_x(pg, op1, op2); - #else - return simde_svadd_s16_x(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s16_x - #define svadd_n_s16_x(pg, op1, op2) simde_svadd_n_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s16_z(pg, op1, op2); - #else - return simde_svadd_s16_z(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s16_z - #define svadd_n_s16_z(pg, op1, op2) simde_svadd_n_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svadd_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s16_m(pg, op1, op2); - #else - return simde_svadd_s16_m(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s16_m - #define svadd_n_s16_m(pg, op1, op2) simde_svadd_n_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s32_x(pg, op1, op2); - #else - simde_svint32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_s32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi32(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi32(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi32(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi32(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s32_x - #define svadd_s32_x(pg, op1, op2) simde_svadd_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s32_z(pg, op1, op2); - #else - return simde_x_svsel_s32_z(pg, simde_svadd_s32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s32_z - #define svadd_s32_z(pg, op1, op2) simde_svadd_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s32_m(pg, op1, op2); - #else - return simde_svsel_s32(pg, simde_svadd_s32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s32_m - #define svadd_s32_m(pg, op1, op2) simde_svadd_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s32_x(pg, op1, op2); - #else - return simde_svadd_s32_x(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s32_x - #define svadd_n_s32_x(pg, op1, op2) simde_svadd_n_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s32_z(pg, op1, op2); - #else - return simde_svadd_s32_z(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s32_z - #define svadd_n_s32_z(pg, op1, op2) simde_svadd_n_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svadd_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s32_m(pg, op1, op2); - #else - return simde_svadd_s32_m(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s32_m - #define svadd_n_s32_m(pg, op1, op2) simde_svadd_n_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s64_x(pg, op1, op2); - #else - simde_svint64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_s64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi64(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi64(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi64(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi64(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s64_x - #define svadd_s64_x(pg, op1, op2) simde_svadd_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s64_z(pg, op1, op2); - #else - return simde_x_svsel_s64_z(pg, simde_svadd_s64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s64_z - #define svadd_s64_z(pg, op1, op2) simde_svadd_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_s64_m(pg, op1, op2); - #else - return simde_svsel_s64(pg, simde_svadd_s64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_s64_m - #define svadd_s64_m(pg, op1, op2) simde_svadd_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s64_x(pg, op1, op2); - #else - return simde_svadd_s64_x(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s64_x - #define svadd_n_s64_x(pg, op1, op2) simde_svadd_n_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s64_z(pg, op1, op2); - #else - return simde_svadd_s64_z(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s64_z - #define svadd_n_s64_z(pg, op1, op2) simde_svadd_n_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svadd_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_s64_m(pg, op1, op2); - #else - return simde_svadd_s64_m(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_s64_m - #define svadd_n_s64_m(pg, op1, op2) simde_svadd_n_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u8_x(pg, op1, op2); - #else - simde_svuint8_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_u8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi8(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u8_x - #define svadd_u8_x(pg, op1, op2) simde_svadd_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u8_z(pg, op1, op2); - #else - return simde_x_svsel_u8_z(pg, simde_svadd_u8_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u8_z - #define svadd_u8_z(pg, op1, op2) simde_svadd_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u8_m(pg, op1, op2); - #else - return simde_svsel_u8(pg, simde_svadd_u8_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u8_m - #define svadd_u8_m(pg, op1, op2) simde_svadd_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u8_x(pg, op1, op2); - #else - return simde_svadd_u8_x(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u8_x - #define svadd_n_u8_x(pg, op1, op2) simde_svadd_n_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u8_z(pg, op1, op2); - #else - return simde_svadd_u8_z(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u8_z - #define svadd_n_u8_z(pg, op1, op2) simde_svadd_n_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svadd_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u8_m(pg, op1, op2); - #else - return simde_svadd_u8_m(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u8_m - #define svadd_n_u8_m(pg, op1, op2) simde_svadd_n_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u16_x(pg, op1, op2); - #else - simde_svuint16_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_u16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi16(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u16_x - #define svadd_u16_x(pg, op1, op2) simde_svadd_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u16_z(pg, op1, op2); - #else - return simde_x_svsel_u16_z(pg, simde_svadd_u16_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u16_z - #define svadd_u16_z(pg, op1, op2) simde_svadd_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u16_m(pg, op1, op2); - #else - return simde_svsel_u16(pg, simde_svadd_u16_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u16_m - #define svadd_u16_m(pg, op1, op2) simde_svadd_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u16_x(pg, op1, op2); - #else - return simde_svadd_u16_x(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u16_x - #define svadd_n_u16_x(pg, op1, op2) simde_svadd_n_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u16_z(pg, op1, op2); - #else - return simde_svadd_u16_z(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u16_z - #define svadd_n_u16_z(pg, op1, op2) simde_svadd_n_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svadd_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u16_m(pg, op1, op2); - #else - return simde_svadd_u16_m(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u16_m - #define svadd_n_u16_m(pg, op1, op2) simde_svadd_n_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u32_x(pg, op1, op2); - #else - simde_svuint32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_u32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi32(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi32(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi32(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi32(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u32_x - #define svadd_u32_x(pg, op1, op2) simde_svadd_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u32_z(pg, op1, op2); - #else - return simde_x_svsel_u32_z(pg, simde_svadd_u32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u32_z - #define svadd_u32_z(pg, op1, op2) simde_svadd_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u32_m(pg, op1, op2); - #else - return simde_svsel_u32(pg, simde_svadd_u32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u32_m - #define svadd_u32_m(pg, op1, op2) simde_svadd_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u32_x(pg, op1, op2); - #else - return simde_svadd_u32_x(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u32_x - #define svadd_n_u32_x(pg, op1, op2) simde_svadd_n_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u32_z(pg, op1, op2); - #else - return simde_svadd_u32_z(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u32_z - #define svadd_n_u32_z(pg, op1, op2) simde_svadd_n_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svadd_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u32_m(pg, op1, op2); - #else - return simde_svadd_u32_m(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u32_m - #define svadd_n_u32_m(pg, op1, op2) simde_svadd_n_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u64_x(pg, op1, op2); - #else - simde_svuint64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_u64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_add_epi64(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_add_epi64(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_add_epi64(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_add_epi64(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u64_x - #define svadd_u64_x(pg, op1, op2) simde_svadd_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u64_z(pg, op1, op2); - #else - return simde_x_svsel_u64_z(pg, simde_svadd_u64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u64_z - #define svadd_u64_z(pg, op1, op2) simde_svadd_u64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_u64_m(pg, op1, op2); - #else - return simde_svsel_u64(pg, simde_svadd_u64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_u64_m - #define svadd_u64_m(pg, op1, op2) simde_svadd_u64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u64_x(pg, op1, op2); - #else - return simde_svadd_u64_x(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u64_x - #define svadd_n_u64_x(pg, op1, op2) simde_svadd_n_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u64_z(pg, op1, op2); - #else - return simde_svadd_u64_z(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u64_z - #define svadd_n_u64_z(pg, op1, op2) simde_svadd_n_u64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svadd_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_u64_m(pg, op1, op2); - #else - return simde_svadd_u64_m(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_u64_m - #define svadd_n_u64_m(pg, op1, op2) simde_svadd_n_u64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f32_x(pg, op1, op2); - #else - simde_svfloat32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vaddq_f32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512 = _mm512_add_ps(op1.m512, op2.m512); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256[0] = _mm256_add_ps(op1.m256[0], op2.m256[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { - r.m256[i] = _mm256_add_ps(op1.m256[i], op2.m256[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { - r.m128[i] = _mm_add_ps(op1.m128[i], op2.m128[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f32x4_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f32_x - #define svadd_f32_x(pg, op1, op2) simde_svadd_f32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f32_z(pg, op1, op2); - #else - return simde_x_svsel_f32_z(pg, simde_svadd_f32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f32_z - #define svadd_f32_z(pg, op1, op2) simde_svadd_f32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f32_m(pg, op1, op2); - #else - return simde_svsel_f32(pg, simde_svadd_f32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f32_m - #define svadd_f32_m(pg, op1, op2) simde_svadd_f32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_n_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f32_x(pg, op1, op2); - #else - return simde_svadd_f32_x(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f32_x - #define svadd_n_f32_x(pg, op1, op2) simde_svadd_n_f32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_n_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f32_z(pg, op1, op2); - #else - return simde_svadd_f32_z(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f32_z - #define svadd_n_f32_z(pg, op1, op2) simde_svadd_n_f32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svadd_n_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f32_m(pg, op1, op2); - #else - return simde_svadd_f32_m(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f32_m - #define svadd_n_f32_m(pg, op1, op2) simde_svadd_n_f32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f64_x(pg, op1, op2); - #else - simde_svfloat64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon = vaddq_f64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512d = _mm512_add_pd(op1.m512d, op2.m512d); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256d[0] = _mm256_add_pd(op1.m256d[0], op2.m256d[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { - r.m256d[i] = _mm256_add_pd(op1.m256d[i], op2.m256d[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { - r.m128d[i] = _mm_add_pd(op1.m128d[i], op2.m128d[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r.altivec = vec_add(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec + op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f64x2_add(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values + op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] + op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f64_x - #define svadd_f64_x(pg, op1, op2) simde_svadd_f64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f64_z(pg, op1, op2); - #else - return simde_x_svsel_f64_z(pg, simde_svadd_f64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f64_z - #define svadd_f64_z(pg, op1, op2) simde_svadd_f64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_f64_m(pg, op1, op2); - #else - return simde_svsel_f64(pg, simde_svadd_f64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_f64_m - #define svadd_f64_m(pg, op1, op2) simde_svadd_f64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_n_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f64_x(pg, op1, op2); - #else - return simde_svadd_f64_x(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f64_x - #define svadd_n_f64_x(pg, op1, op2) simde_svadd_n_f64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_n_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f64_z(pg, op1, op2); - #else - return simde_svadd_f64_z(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f64_z - #define svadd_n_f64_z(pg, op1, op2) simde_svadd_n_f64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svadd_n_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svadd_n_f64_m(pg, op1, op2); - #else - return simde_svadd_f64_m(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svadd_n_f64_m - #define svadd_n_f64_m(pg, op1, op2) simde_svadd_n_f64_m(pg, op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_x (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_z (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_m (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_x(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_z(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_m(pg, op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_svadd_x(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svadd_s8_x, \ - simde_svint16_t: simde_svadd_s16_x, \ - simde_svint32_t: simde_svadd_s32_x, \ - simde_svint64_t: simde_svadd_s64_x, \ - simde_svuint8_t: simde_svadd_u8_x, \ - simde_svuint16_t: simde_svadd_u16_x, \ - simde_svuint32_t: simde_svadd_u32_x, \ - simde_svuint64_t: simde_svadd_u64_x, \ - simde_svfloat32_t: simde_svadd_f32_x, \ - simde_svfloat64_t: simde_svadd_f64_x, \ - int8_t: simde_svadd_n_s8_x, \ - int16_t: simde_svadd_n_s16_x, \ - int32_t: simde_svadd_n_s32_x, \ - int64_t: simde_svadd_n_s64_x, \ - uint8_t: simde_svadd_n_u8_x, \ - uint16_t: simde_svadd_n_u16_x, \ - uint32_t: simde_svadd_n_u32_x, \ - uint64_t: simde_svadd_n_u64_x, \ - simde_float32: simde_svadd_n_f32_x, \ - simde_float64: simde_svadd_n_f64_x)((pg), (op1), (op2))) - - #define simde_svadd_z(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svadd_s8_z, \ - simde_svint16_t: simde_svadd_s16_z, \ - simde_svint32_t: simde_svadd_s32_z, \ - simde_svint64_t: simde_svadd_s64_z, \ - simde_svuint8_t: simde_svadd_u8_z, \ - simde_svuint16_t: simde_svadd_u16_z, \ - simde_svuint32_t: simde_svadd_u32_z, \ - simde_svuint64_t: simde_svadd_u64_z, \ - simde_svfloat32_t: simde_svadd_f32_z, \ - simde_svfloat64_t: simde_svadd_f64_z, \ - int8_t: simde_svadd_n_s8_z, \ - int16_t: simde_svadd_n_s16_z, \ - int32_t: simde_svadd_n_s32_z, \ - int64_t: simde_svadd_n_s64_z, \ - uint8_t: simde_svadd_n_u8_z, \ - uint16_t: simde_svadd_n_u16_z, \ - uint32_t: simde_svadd_n_u32_z, \ - uint64_t: simde_svadd_n_u64_z, \ - simde_float32: simde_svadd_n_f32_z, \ - simde_float64: simde_svadd_n_f64_z)((pg), (op1), (op2))) - - #define simde_svadd_m(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svadd_s8_m, \ - simde_svint16_t: simde_svadd_s16_m, \ - simde_svint32_t: simde_svadd_s32_m, \ - simde_svint64_t: simde_svadd_s64_m, \ - simde_svuint8_t: simde_svadd_u8_m, \ - simde_svuint16_t: simde_svadd_u16_m, \ - simde_svuint32_t: simde_svadd_u32_m, \ - simde_svuint64_t: simde_svadd_u64_m, \ - simde_svfloat32_t: simde_svadd_f32_m, \ - simde_svfloat64_t: simde_svadd_f64_m, \ - int8_t: simde_svadd_n_s8_m, \ - int16_t: simde_svadd_n_s16_m, \ - int32_t: simde_svadd_n_s32_m, \ - int64_t: simde_svadd_n_s64_m, \ - uint8_t: simde_svadd_n_u8_m, \ - uint16_t: simde_svadd_n_u16_m, \ - uint32_t: simde_svadd_n_u32_m, \ - uint64_t: simde_svadd_n_u64_m, \ - simde_float32: simde_svadd_n_f32_m, \ - simde_float64: simde_svadd_n_f64_m)((pg), (op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svadd_x - #undef svadd_z - #undef svadd_m - #undef svadd_n_x - #undef svadd_n_z - #undef svadd_n_m - #define svadd_x(pg, op1, op2) simde_svadd_x((pg), (op1), (op2)) - #define svadd_z(pg, op1, op2) simde_svadd_z((pg), (op1), (op2)) - #define svadd_m(pg, op1, op2) simde_svadd_m((pg), (op1), (op2)) - #define svadd_n_x(pg, op1, op2) simde_svadd_n_x((pg), (op1), (op2)) - #define svadd_n_z(pg, op1, op2) simde_svadd_n_z((pg), (op1), (op2)) - #define svadd_n_m(pg, op1, op2) simde_svadd_n_m((pg), (op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_ADD_H */ -/* :: End simde/arm/sve/add.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/and.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_AND_H) -#define SIMDE_ARM_SVE_AND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s8_x(pg, op1, op2); - #else - simde_svint8_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec & op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values & op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] & op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s8_x - #define svand_s8_x(pg, op1, op2) simde_svand_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s8_z(pg, op1, op2); - #else - return simde_x_svsel_s8_z(pg, simde_svand_s8_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s8_z - #define svand_s8_z(pg, op1, op2) simde_svand_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s8_m(pg, op1, op2); - #else - return simde_svsel_s8(pg, simde_svand_s8_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s8_m - #define svand_s8_m(pg, op1, op2) simde_svand_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s8_z(pg, op1, op2); - #else - return simde_svand_s8_z(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s8_z - #define svand_n_s8_z(pg, op1, op2) simde_svand_n_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s8_m(pg, op1, op2); - #else - return simde_svand_s8_m(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s8_m - #define svand_n_s8_m(pg, op1, op2) simde_svand_n_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svand_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s8_x(pg, op1, op2); - #else - return simde_svand_s8_x(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s8_x - #define svand_n_s8_x(pg, op1, op2) simde_svand_n_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s16_x(pg, op1, op2); - #else - simde_svint16_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec & op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values & op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] & op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s16_x - #define svand_s16_x(pg, op1, op2) simde_svand_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s16_z(pg, op1, op2); - #else - return simde_x_svsel_s16_z(pg, simde_svand_s16_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s16_z - #define svand_s16_z(pg, op1, op2) simde_svand_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s16_m(pg, op1, op2); - #else - return simde_svsel_s16(pg, simde_svand_s16_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s16_m - #define svand_s16_m(pg, op1, op2) simde_svand_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s16_z(pg, op1, op2); - #else - return simde_svand_s16_z(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s16_z - #define svand_n_s16_z(pg, op1, op2) simde_svand_n_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s16_m(pg, op1, op2); - #else - return simde_svand_s16_m(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s16_m - #define svand_n_s16_m(pg, op1, op2) simde_svand_n_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svand_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s16_x(pg, op1, op2); - #else - return simde_svand_s16_x(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s16_x - #define svand_n_s16_x(pg, op1, op2) simde_svand_n_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s32_x(pg, op1, op2); - #else - simde_svint32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_and(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec & op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values & op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] & op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s32_x - #define svand_s32_x(pg, op1, op2) simde_svand_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s32_z(pg, op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svint32_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_maskz_and_epi32(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i); - #else - r.m256i[0] = _mm256_maskz_and_epi32(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]); - #endif - - return r; - #else - return simde_x_svsel_s32_z(pg, simde_svand_s32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s32_z - #define svand_s32_z(pg, op1, op2) simde_svand_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s32_m(pg, op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svint32_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_mask_and_epi32(op1.m512i, simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i); - #else - r.m256i[0] = _mm256_mask_and_epi32(op1.m256i[0], simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]); - #endif - - return r; - #else - return simde_svsel_s32(pg, simde_svand_s32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s32_m - #define svand_s32_m(pg, op1, op2) simde_svand_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s32_z(pg, op1, op2); - #else - return simde_svand_s32_z(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s32_z - #define svand_n_s32_z(pg, op1, op2) simde_svand_n_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s32_m(pg, op1, op2); - #else - return simde_svand_s32_m(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s32_m - #define svand_n_s32_m(pg, op1, op2) simde_svand_n_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svand_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s32_x(pg, op1, op2); - #else - return simde_svand_s32_x(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s32_x - #define svand_n_s32_x(pg, op1, op2) simde_svand_n_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s64_x(pg, op1, op2); - #else - simde_svint64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vandq_s64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r.altivec = vec_and(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec & op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values & op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] & op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s64_x - #define svand_s64_x(pg, op1, op2) simde_svand_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s64_z(pg, op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svint64_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_maskz_and_epi64(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i); - #else - r.m256i[0] = _mm256_maskz_and_epi64(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]); - #endif - - return r; - #else - return simde_x_svsel_s64_z(pg, simde_svand_s64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s64_z - #define svand_s64_z(pg, op1, op2) simde_svand_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_s64_m(pg, op1, op2); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - simde_svint64_t r; - - #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 - r.m512i = _mm512_mask_and_epi64(op1.m512i, simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i); - #else - r.m256i[0] = _mm256_mask_and_epi64(op1.m256i[0], simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]); - #endif - - return r; - #else - return simde_svsel_s64(pg, simde_svand_s64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_s64_m - #define svand_s64_m(pg, op1, op2) simde_svand_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s64_z(pg, op1, op2); - #else - return simde_svand_s64_z(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s64_z - #define svand_n_s64_z(pg, op1, op2) simde_svand_n_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s64_m(pg, op1, op2); - #else - return simde_svand_s64_m(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s64_m - #define svand_n_s64_m(pg, op1, op2) simde_svand_n_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svand_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_s64_x(pg, op1, op2); - #else - return simde_svand_s64_x(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_s64_x - #define svand_n_s64_x(pg, op1, op2) simde_svand_n_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u8_z(pg, op1, op2); - #else - return simde_svreinterpret_u8_s8(simde_svand_s8_z(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u8_z - #define svand_u8_z(pg, op1, op2) simde_svand_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u8_m(pg, op1, op2); - #else - return simde_svreinterpret_u8_s8(simde_svand_s8_m(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u8_m - #define svand_u8_m(pg, op1, op2) simde_svand_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u8_x(pg, op1, op2); - #else - return simde_svreinterpret_u8_s8(simde_svand_s8_x(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u8_x - #define svand_u8_x(pg, op1, op2) simde_svand_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u8_z(pg, op1, op2); - #else - return simde_svand_u8_z(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u8_z - #define svand_n_u8_z(pg, op1, op2) simde_svand_n_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u8_m(pg, op1, op2); - #else - return simde_svand_u8_m(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u8_m - #define svand_n_u8_m(pg, op1, op2) simde_svand_n_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svand_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u8_x(pg, op1, op2); - #else - return simde_svand_u8_x(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u8_x - #define svand_n_u8_x(pg, op1, op2) simde_svand_n_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u16_z(pg, op1, op2); - #else - return simde_svreinterpret_u16_s16(simde_svand_s16_z(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u16_z - #define svand_u16_z(pg, op1, op2) simde_svand_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u16_m(pg, op1, op2); - #else - return simde_svreinterpret_u16_s16(simde_svand_s16_m(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u16_m - #define svand_u16_m(pg, op1, op2) simde_svand_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u16_x(pg, op1, op2); - #else - return simde_svreinterpret_u16_s16(simde_svand_s16_x(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u16_x - #define svand_u16_x(pg, op1, op2) simde_svand_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u16_z(pg, op1, op2); - #else - return simde_svand_u16_z(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u16_z - #define svand_n_u16_z(pg, op1, op2) simde_svand_n_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u16_m(pg, op1, op2); - #else - return simde_svand_u16_m(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u16_m - #define svand_n_u16_m(pg, op1, op2) simde_svand_n_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svand_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u16_x(pg, op1, op2); - #else - return simde_svand_u16_x(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u16_x - #define svand_n_u16_x(pg, op1, op2) simde_svand_n_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u32_z(pg, op1, op2); - #else - return simde_svreinterpret_u32_s32(simde_svand_s32_z(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u32_z - #define svand_u32_z(pg, op1, op2) simde_svand_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u32_m(pg, op1, op2); - #else - return simde_svreinterpret_u32_s32(simde_svand_s32_m(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u32_m - #define svand_u32_m(pg, op1, op2) simde_svand_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u32_x(pg, op1, op2); - #else - return simde_svreinterpret_u32_s32(simde_svand_s32_x(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u32_x - #define svand_u32_x(pg, op1, op2) simde_svand_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u32_z(pg, op1, op2); - #else - return simde_svand_u32_z(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u32_z - #define svand_n_u32_z(pg, op1, op2) simde_svand_n_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u32_m(pg, op1, op2); - #else - return simde_svand_u32_m(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u32_m - #define svand_n_u32_m(pg, op1, op2) simde_svand_n_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svand_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u32_x(pg, op1, op2); - #else - return simde_svand_u32_x(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u32_x - #define svand_n_u32_x(pg, op1, op2) simde_svand_n_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u64_z(pg, op1, op2); - #else - return simde_svreinterpret_u64_s64(simde_svand_s64_z(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u64_z - #define svand_u64_z(pg, op1, op2) simde_svand_u64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u64_m(pg, op1, op2); - #else - return simde_svreinterpret_u64_s64(simde_svand_s64_m(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u64_m - #define svand_u64_m(pg, op1, op2) simde_svand_u64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_u64_x(pg, op1, op2); - #else - return simde_svreinterpret_u64_s64(simde_svand_s64_x(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_u64_x - #define svand_u64_x(pg, op1, op2) simde_svand_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u64_z(pg, op1, op2); - #else - return simde_svand_u64_z(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u64_z - #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u64_m(pg, op1, op2); - #else - return simde_svand_u64_m(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u64_m - #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svand_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svand_n_u64_x(pg, op1, op2); - #else - return simde_svand_u64_x(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svand_n_u64_x - #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_z(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_m(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_x(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_z(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_m(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_x(pg, op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_svand_z(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svand_s8_z, \ - simde_svint16_t: simde_svand_s16_z, \ - simde_svint32_t: simde_svand_s32_z, \ - simde_svint64_t: simde_svand_s64_z, \ - simde_svuint8_t: simde_svand_u8_z, \ - simde_svuint16_t: simde_svand_u16_z, \ - simde_svuint32_t: simde_svand_u32_z, \ - simde_svuint64_t: simde_svand_u64_z, \ - int8_t: simde_svand_n_s8_z, \ - int16_t: simde_svand_n_s16_z, \ - int32_t: simde_svand_n_s32_z, \ - int64_t: simde_svand_n_s64_z, \ - uint8_t: simde_svand_n_u8_z, \ - uint16_t: simde_svand_n_u16_z, \ - uint32_t: simde_svand_n_u32_z, \ - uint64_t: simde_svand_n_u64_z)((pg), (op1), (op2))) - - #define simde_svand_m(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svand_s8_m, \ - simde_svint16_t: simde_svand_s16_m, \ - simde_svint32_t: simde_svand_s32_m, \ - simde_svint64_t: simde_svand_s64_m, \ - simde_svuint8_t: simde_svand_u8_m, \ - simde_svuint16_t: simde_svand_u16_m, \ - simde_svuint32_t: simde_svand_u32_m, \ - simde_svuint64_t: simde_svand_u64_m, \ - int8_t: simde_svand_n_s8_m, \ - int16_t: simde_svand_n_s16_m, \ - int32_t: simde_svand_n_s32_m, \ - int64_t: simde_svand_n_s64_m, \ - uint8_t: simde_svand_n_u8_m, \ - uint16_t: simde_svand_n_u16_m, \ - uint32_t: simde_svand_n_u32_m, \ - uint64_t: simde_svand_n_u64_m)((pg), (op1), (op2))) - - #define simde_svand_x(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svand_s8_x, \ - simde_svint16_t: simde_svand_s16_x, \ - simde_svint32_t: simde_svand_s32_x, \ - simde_svint64_t: simde_svand_s64_x, \ - simde_svuint8_t: simde_svand_u8_x, \ - simde_svuint16_t: simde_svand_u16_x, \ - simde_svuint32_t: simde_svand_u32_x, \ - simde_svuint64_t: simde_svand_u64_x, \ - int8_t: simde_svand_n_s8_x, \ - int16_t: simde_svand_n_s16_x, \ - int32_t: simde_svand_n_s32_x, \ - int64_t: simde_svand_n_s64_x, \ - uint8_t: simde_svand_n_u8_x, \ - uint16_t: simde_svand_n_u16_x, \ - uint32_t: simde_svand_n_u32_x, \ - uint64_t: simde_svand_n_u64_x)((pg), (op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svand_x - #undef svand_z - #undef svand_m - #define svand_x(pg, op1, op2) simde_svand_x((pg), (op1), (op2)) - #define svand_z(pg, op1, op2) simde_svand_z((pg), (op1), (op2)) - #define svand_m(pg, op1, op2) simde_svand_m((pg), (op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_AND_H */ -/* :: End simde/arm/sve/and.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/cmplt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_CMPLT_H) -#define SIMDE_ARM_SVE_CMPLT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_s8(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_s8(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epi8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epi8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_i8 = vandq_s8(pg.neon_i8, vreinterpretq_s8_u8(vcltq_s8(op1.neon, op2.neon))); - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi8(op1.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b8 = vec_and(pg.altivec_b8, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b8 = pg.altivec_b8 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_i8x16_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i8 = pg.values_i8 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i8), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i8) / sizeof(r.values_i8[0])) ; i++) { - r.values_i8[i] = pg.values_i8[i] & ((op1.values[i] < op2.values[i]) ? ~INT8_C(0) : INT8_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_s8 - #define svcmplt_s8(pg, op1, op2) simde_svcmplt_s8(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_s16(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_s16(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epi16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epi16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_i16 = vandq_s16(pg.neon_i16, vreinterpretq_s16_u16(vcltq_s16(op1.neon, op2.neon))); - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi16(op1.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b16 = vec_and(pg.altivec_b16, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b16 = pg.altivec_b16 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_i16x8_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i16 = pg.values_i16 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i16), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i16) / sizeof(r.values_i16[0])) ; i++) { - r.values_i16[i] = pg.values_i16[i] & ((op1.values[i] < op2.values[i]) ? ~INT16_C(0) : INT16_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_s16 - #define svcmplt_s16(pg, op1, op2) simde_svcmplt_s16(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_s32(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_s32(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epi32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epi32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_i32 = vandq_s32(pg.neon_i32, vreinterpretq_s32_u32(vcltq_s32(op1.neon, op2.neon))); - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi32(op1.m128i[i], op2.m128i[i])); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_i32x4_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i32 = pg.values_i32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i32), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i32) / sizeof(r.values_i32[0])) ; i++) { - r.values_i32[i] = pg.values_i32[i] & ((op1.values[i] < op2.values[i]) ? ~INT32_C(0) : INT32_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_s32 - #define svcmplt_s32(pg, op1, op2) simde_svcmplt_s32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_s64(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_s64(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epi64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epi64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon_i64 = vandq_s64(pg.neon_i64, vreinterpretq_s64_u64(vcltq_s64(op1.neon, op2.neon))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec_b64 = vec_and(pg.altivec_b64, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) - r.v128 = wasm_v128_and(pg.v128, wasm_i64x2_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i64 = pg.values_i64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i64), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i64) / sizeof(r.values_i64[0])) ; i++) { - r.values_i64[i] = pg.values_i64[i] & ((op1.values[i] < op2.values[i]) ? ~INT64_C(0) : INT64_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_s64 - #define svcmplt_s64(pg, op1, op2) simde_svcmplt_s64(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_u8(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_u8(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epu8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epu8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_u8 = vandq_u8(pg.neon_u8, vcltq_u8(op1.neon, op2.neon)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b8 = vec_and(pg.altivec_b8, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b8 = pg.altivec_b8 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_u8x16_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_u8 = pg.values_u8 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u8), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u8) / sizeof(r.values_u8[0])) ; i++) { - r.values_u8[i] = pg.values_u8[i] & ((op1.values[i] < op2.values[i]) ? ~UINT8_C(0) : UINT8_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_u8 - #define svcmplt_u8(pg, op1, op2) simde_svcmplt_u8(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_u16(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_u16(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epu16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epu16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_u16 = vandq_u16(pg.neon_u16, vcltq_u16(op1.neon, op2.neon)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b16 = vec_and(pg.altivec_b16, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b16 = pg.altivec_b16 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_u16x8_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_u16 = pg.values_u16 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u16), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u16) / sizeof(r.values_u16[0])) ; i++) { - r.values_u16[i] = pg.values_u16[i] & ((op1.values[i] < op2.values[i]) ? ~UINT16_C(0) : UINT16_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_u16 - #define svcmplt_u16(pg, op1, op2) simde_svcmplt_u16(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_u32(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_u32(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epu32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epu32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_u32(op1.neon, op2.neon)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_u32x4_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_u32 = pg.values_u32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u32), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u32) / sizeof(r.values_u32[0])) ; i++) { - r.values_u32[i] = pg.values_u32[i] & ((op1.values[i] < op2.values[i]) ? ~UINT32_C(0) : UINT32_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_u32 - #define svcmplt_u32(pg, op1, op2) simde_svcmplt_u32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_u64(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_u64(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epu64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epu64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0])); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_u64(op1.neon, op2.neon)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec_b64 = vec_and(pg.altivec_b64, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) - r.v128 = wasm_v128_and(pg.v128, wasm_u64x2_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_u64 = pg.values_u64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u64), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u64) / sizeof(r.values_u64[0])) ; i++) { - r.values_u64[i] = pg.values_u64[i] & ((op1.values[i] < op2.values[i]) ? ~UINT64_C(0) : UINT64_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_u64 - #define svcmplt_u64(pg, op1, op2) simde_svcmplt_u64(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_f32(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_f32(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask16(_mm512_mask_cmp_ps_mask(simde_svbool_to_mmask16(pg), op1.m512, op2.m512, _CMP_LT_OQ)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm256_mask_cmp_ps_mask(simde_svbool_to_mmask8(pg), op1.m256[0], op2.m256[0], _CMP_LT_OQ)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_f32(op1.neon, op2.neon)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(pg.m128i[i]), _mm_cmplt_ps(op1.m128[i], op2.m128[i]))); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); - #elif defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_v128_and(pg.v128, wasm_f32x4_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i32 = pg.values_i32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i32), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i32) / sizeof(r.values_i32[0])) ; i++) { - r.values_i32[i] = pg.values_i32[i] & ((op1.values[i] < op2.values[i]) ? ~INT32_C(0) : INT32_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_f32 - #define svcmplt_f32(pg, op1, op2) simde_svcmplt_f32(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svbool_t -simde_svcmplt_f64(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svcmplt_f64(pg, op1, op2); - #else - simde_svbool_t r; - - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask8(_mm512_mask_cmp_pd_mask(simde_svbool_to_mmask8(pg), op1.m512d, op2.m512d, _CMP_LT_OQ)); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - r = simde_svbool_from_mmask4(_mm256_mask_cmp_pd_mask(simde_svbool_to_mmask4(pg), op1.m256d[0], op2.m256d[0], _CMP_LT_OQ)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_f64(op1.neon, op2.neon)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_castpd_si128(_mm_and_pd(_mm_castsi128_pd(pg.m128i[i]), _mm_cmplt_pd(op1.m128d[i], op2.m128d[i]))); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) - r.v128 = wasm_v128_and(pg.v128, wasm_f64x2_lt(op1.v128, op2.v128)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values_i64 = pg.values_i64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i64), op1.values < op2.values); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i64) / sizeof(r.values_i64[0])) ; i++) { - r.values_i64[i] = pg.values_i64[i] & ((op1.values[i] < op2.values[i]) ? ~INT64_C(0) : INT64_C(0)); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svcmplt_f64 - #define svcmplt_f64(pg, op1, op2) simde_svcmplt_f64(pg, op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svcmplt_s8(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svcmplt_s16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svcmplt_s32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svcmplt_s64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svcmplt_u8(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svcmplt_u16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svcmplt_u32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svcmplt_u64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svcmplt_f32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svcmplt_f64(pg, op1, op2); } - - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmplt_s8(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmplt_s16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmplt_s32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmplt_s64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmplt_u8(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmplt_u16(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmplt_u32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmplt_u64(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmplt_f32(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmplt_f64(pg, op1, op2); } - #endif -#elif defined(SIMDE_GENERIC_) - #define simde_svcmplt(pg, op1, op2) \ - (SIMDE_GENERIC_((op1), \ - simde_svint8_t: simde_svcmplt_s8)(pg, op1, op2), \ - simde_svint16_t: simde_svcmplt_s16)(pg, op1, op2), \ - simde_svint32_t: simde_svcmplt_s32)(pg, op1, op2), \ - simde_svint64_t: simde_svcmplt_s64)(pg, op1, op2), \ - simde_svuint8_t: simde_svcmplt_u8)(pg, op1, op2), \ - simde_svuint16_t: simde_svcmplt_u16)(pg, op1, op2), \ - simde_svuint32_t: simde_svcmplt_u32)(pg, op1, op2), \ - simde_svuint64_t: simde_svcmplt_u64)(pg, op1, op2), \ - simde_svint32_t: simde_svcmplt_f32)(pg, op1, op2), \ - simde_svint64_t: simde_svcmplt_f64)(pg, op1, op2)) - - #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #define svcmplt(pg, op1, op2) \ - (SIMDE_GENERIC_((op1), \ - svint8_t: svcmplt_s8)(pg, op1, op2), \ - svint16_t: svcmplt_s16)(pg, op1, op2), \ - svint32_t: svcmplt_s32)(pg, op1, op2), \ - svint64_t: svcmplt_s64)(pg, op1, op2), \ - svuint8_t: svcmplt_u8)(pg, op1, op2), \ - svuint16_t: svcmplt_u16)(pg, op1, op2), \ - svuint32_t: svcmplt_u32)(pg, op1, op2), \ - svuint64_t: svcmplt_u64)(pg, op1, op2), \ - svint32_t: svcmplt_f32)(pg, op1, op2), \ - svint64_t: svcmplt_f64)(pg, op1, op2)) - #endif -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svcmplt - #define svcmplt(pg, op1, op2) simde_svcmplt((pg), (op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_CMPLT_H */ -/* :: End simde/arm/sve/cmplt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/qadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_QADD_H) -#define SIMDE_ARM_SVE_QADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svqadd_s8(simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_s8(op1, op2); - #else - simde_svint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_s8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_adds_epi8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_adds_epi8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_adds_epi8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_add_sat(op1.v128, op2.v128); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_i8(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_s8 - #define svqadd_s8(op1, op2) simde_svqadd_s8(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svqadd_n_s8(simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_s8(op1, op2); - #else - return simde_svqadd_s8(op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_s8 - #define svqadd_n_s8(op1, op2) simde_svqadd_n_s8(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svqadd_s16(simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_s16(op1, op2); - #else - simde_svint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_s16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_adds_epi16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_adds_epi16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_adds_epi16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_add_sat(op1.v128, op2.v128); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_i16(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_s16 - #define svqadd_s16(op1, op2) simde_svqadd_s16(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svqadd_n_s16(simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_s16(op1, op2); - #else - return simde_svqadd_s16(op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_s16 - #define svqadd_n_s16(op1, op2) simde_svqadd_n_s16(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svqadd_s32(simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_s32(op1, op2); - #else - simde_svint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_s32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm512_cvtsepi64_epi32(_mm512_add_epi64(_mm512_cvtepi32_epi64(op1.m256i[i]), _mm512_cvtepi32_epi64(op2.m256i[i]))); - } - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm256_cvtsepi64_epi32(_mm256_add_epi64(_mm256_cvtepi32_epi64(op1.m128i[i]), _mm256_cvtepi32_epi64(op2.m128i[i]))); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_i32(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_s32 - #define svqadd_s32(op1, op2) simde_svqadd_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svqadd_n_s32(simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_s32(op1, op2); - #else - return simde_svqadd_s32(op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_s32 - #define svqadd_n_s32(op1, op2) simde_svqadd_n_s32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svqadd_s64(simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_s64(op1, op2); - #else - simde_svint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_s64(op1.neon, op2.neon); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_i64(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_s64 - #define svqadd_s64(op1, op2) simde_svqadd_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svqadd_n_s64(simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_s64(op1, op2); - #else - return simde_svqadd_s64(op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_s64 - #define svqadd_n_s64(op1, op2) simde_svqadd_n_s64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svqadd_u8(simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_u8(op1, op2); - #else - simde_svuint8_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_u8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_adds_epu8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_adds_epu8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_adds_epu8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_u8x16_add_sat(op1.v128, op2.v128); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_u8(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_u8 - #define svqadd_u8(op1, op2) simde_svqadd_u8(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svqadd_n_u8(simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_u8(op1, op2); - #else - return simde_svqadd_u8(op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_u8 - #define svqadd_n_u8(op1, op2) simde_svqadd_n_u8(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svqadd_u16(simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_u16(op1, op2); - #else - simde_svuint16_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_u16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_adds_epu16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_adds_epu16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_adds_epu16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_u16x8_add_sat(op1.v128, op2.v128); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_u16(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_u16 - #define svqadd_u16(op1, op2) simde_svqadd_u16(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svqadd_n_u16(simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_u16(op1, op2); - #else - return simde_svqadd_u16(op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_u16 - #define svqadd_n_u16(op1, op2) simde_svqadd_n_u16(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svqadd_u32(simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_u32(op1, op2); - #else - simde_svuint32_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_u32(op1.neon, op2.neon); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_adds(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = - vec_packs( - vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), - vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) - ); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_u32(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_u32 - #define svqadd_u32(op1, op2) simde_svqadd_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svqadd_n_u32(simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_u32(op1, op2); - #else - return simde_svqadd_u32(op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_u32 - #define svqadd_n_u32(op1, op2) simde_svqadd_n_u32(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svqadd_u64(simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_u64(op1, op2); - #else - simde_svuint64_t r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vqaddq_u64(op1.neon, op2.neon); - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = simde_math_adds_u64(op1.values[i], op2.values[i]); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_u64 - #define svqadd_u64(op1, op2) simde_svqadd_u64(op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svqadd_n_u64(simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svqadd_n_u64(op1, op2); - #else - return simde_svqadd_u64(op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svqadd_n_u64 - #define svqadd_n_u64(op1, op2) simde_svqadd_n_u64(op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svqadd( simde_svint8_t op1, simde_svint8_t op2) { return simde_svqadd_s8 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svqadd( simde_svint16_t op1, simde_svint16_t op2) { return simde_svqadd_s16 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svqadd( simde_svint32_t op1, simde_svint32_t op2) { return simde_svqadd_s32 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svqadd( simde_svint64_t op1, simde_svint64_t op2) { return simde_svqadd_s64 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svqadd( simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svqadd_u8 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svqadd( simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svqadd_u16 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svqadd( simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svqadd_u32 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svqadd( simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svqadd_u64 (op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svqadd( simde_svint8_t op1, int8_t op2) { return simde_svqadd_n_s8 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svqadd( simde_svint16_t op1, int16_t op2) { return simde_svqadd_n_s16(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svqadd( simde_svint32_t op1, int32_t op2) { return simde_svqadd_n_s32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svqadd( simde_svint64_t op1, int64_t op2) { return simde_svqadd_n_s64(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svqadd( simde_svuint8_t op1, uint8_t op2) { return simde_svqadd_n_u8 (op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svqadd( simde_svuint16_t op1, uint16_t op2) { return simde_svqadd_n_u16(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svqadd( simde_svuint32_t op1, uint32_t op2) { return simde_svqadd_n_u32(op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svqadd( simde_svuint64_t op1, uint64_t op2) { return simde_svqadd_n_u64(op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_svqadd_x(op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svqadd_s8, \ - simde_svint16_t: simde_svqadd_s16, \ - simde_svint32_t: simde_svqadd_s32, \ - simde_svint64_t: simde_svqadd_s64, \ - simde_svuint8_t: simde_svqadd_u8, \ - simde_svuint16_t: simde_svqadd_u16, \ - simde_svuint32_t: simde_svqadd_u32, \ - simde_svuint64_t: simde_svqadd_u64, \ - int8_t: simde_svqadd_n_s8, \ - int16_t: simde_svqadd_n_s16, \ - int32_t: simde_svqadd_n_s32, \ - int64_t: simde_svqadd_n_s64, \ - uint8_t: simde_svqadd_n_u8, \ - uint16_t: simde_svqadd_n_u16, \ - uint32_t: simde_svqadd_n_u32, \ - uint64_t: simde_svqadd_n_u64)((pg), (op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svqadd - #define svqadd(op1, op2) simde_svqadd((pg), (op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_QADD_H */ -/* :: End simde/arm/sve/qadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/arm/sve/sub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_ARM_SVE_SUB_H) -#define SIMDE_ARM_SVE_SUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s8_x(pg, op1, op2); - #else - simde_svint8_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_s8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi8(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s8_x - #define svsub_s8_x(pg, op1, op2) simde_svsub_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s8_z(pg, op1, op2); - #else - return simde_x_svsel_s8_z(pg, simde_svsub_s8_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s8_z - #define svsub_s8_z(pg, op1, op2) simde_svsub_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s8_m(pg, op1, op2); - #else - return simde_svsel_s8(pg, simde_svsub_s8_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s8_m - #define svsub_s8_m(pg, op1, op2) simde_svsub_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s8_x(pg, op1, op2); - #else - return simde_svsub_s8_x(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s8_x - #define svsub_n_s8_x(pg, op1, op2) simde_svsub_n_s8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s8_z(pg, op1, op2); - #else - return simde_svsub_s8_z(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s8_z - #define svsub_n_s8_z(pg, op1, op2) simde_svsub_n_s8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint8_t -simde_svsub_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s8_m(pg, op1, op2); - #else - return simde_svsub_s8_m(pg, op1, simde_svdup_n_s8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s8_m - #define svsub_n_s8_m(pg, op1, op2) simde_svsub_n_s8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s16_x(pg, op1, op2); - #else - simde_svint16_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_s16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi16(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s16_x - #define svsub_s16_x(pg, op1, op2) simde_svsub_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s16_z(pg, op1, op2); - #else - return simde_x_svsel_s16_z(pg, simde_svsub_s16_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s16_z - #define svsub_s16_z(pg, op1, op2) simde_svsub_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s16_m(pg, op1, op2); - #else - return simde_svsel_s16(pg, simde_svsub_s16_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s16_m - #define svsub_s16_m(pg, op1, op2) simde_svsub_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s16_x(pg, op1, op2); - #else - return simde_svsub_s16_x(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s16_x - #define svsub_n_s16_x(pg, op1, op2) simde_svsub_n_s16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s16_z(pg, op1, op2); - #else - return simde_svsub_s16_z(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s16_z - #define svsub_n_s16_z(pg, op1, op2) simde_svsub_n_s16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint16_t -simde_svsub_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s16_m(pg, op1, op2); - #else - return simde_svsub_s16_m(pg, op1, simde_svdup_n_s16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s16_m - #define svsub_n_s16_m(pg, op1, op2) simde_svsub_n_s16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s32_x(pg, op1, op2); - #else - simde_svint32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_s32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi32(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi32(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi32(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi32(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s32_x - #define svsub_s32_x(pg, op1, op2) simde_svsub_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s32_z(pg, op1, op2); - #else - return simde_x_svsel_s32_z(pg, simde_svsub_s32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s32_z - #define svsub_s32_z(pg, op1, op2) simde_svsub_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s32_m(pg, op1, op2); - #else - return simde_svsel_s32(pg, simde_svsub_s32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s32_m - #define svsub_s32_m(pg, op1, op2) simde_svsub_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s32_x(pg, op1, op2); - #else - return simde_svsub_s32_x(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s32_x - #define svsub_n_s32_x(pg, op1, op2) simde_svsub_n_s32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s32_z(pg, op1, op2); - #else - return simde_svsub_s32_z(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s32_z - #define svsub_n_s32_z(pg, op1, op2) simde_svsub_n_s32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint32_t -simde_svsub_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s32_m(pg, op1, op2); - #else - return simde_svsub_s32_m(pg, op1, simde_svdup_n_s32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s32_m - #define svsub_n_s32_m(pg, op1, op2) simde_svsub_n_s32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s64_x(pg, op1, op2); - #else - simde_svint64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_s64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi64(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi64(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi64(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi64(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s64_x - #define svsub_s64_x(pg, op1, op2) simde_svsub_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s64_z(pg, op1, op2); - #else - return simde_x_svsel_s64_z(pg, simde_svsub_s64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s64_z - #define svsub_s64_z(pg, op1, op2) simde_svsub_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_s64_m(pg, op1, op2); - #else - return simde_svsel_s64(pg, simde_svsub_s64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_s64_m - #define svsub_s64_m(pg, op1, op2) simde_svsub_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s64_x(pg, op1, op2); - #else - return simde_svsub_s64_x(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s64_x - #define svsub_n_s64_x(pg, op1, op2) simde_svsub_n_s64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s64_z(pg, op1, op2); - #else - return simde_svsub_s64_z(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s64_z - #define svsub_n_s64_z(pg, op1, op2) simde_svsub_n_s64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svint64_t -simde_svsub_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_s64_m(pg, op1, op2); - #else - return simde_svsub_s64_m(pg, op1, simde_svdup_n_s64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_s64_m - #define svsub_n_s64_m(pg, op1, op2) simde_svsub_n_s64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u8_x(pg, op1, op2); - #else - simde_svuint8_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_u8(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi8(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi8(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi8(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi8(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i8x16_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u8_x - #define svsub_u8_x(pg, op1, op2) simde_svsub_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u8_z(pg, op1, op2); - #else - return simde_x_svsel_u8_z(pg, simde_svsub_u8_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u8_z - #define svsub_u8_z(pg, op1, op2) simde_svsub_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u8_m(pg, op1, op2); - #else - return simde_svsel_u8(pg, simde_svsub_u8_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u8_m - #define svsub_u8_m(pg, op1, op2) simde_svsub_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u8_x(pg, op1, op2); - #else - return simde_svsub_u8_x(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u8_x - #define svsub_n_u8_x(pg, op1, op2) simde_svsub_n_u8_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u8_z(pg, op1, op2); - #else - return simde_svsub_u8_z(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u8_z - #define svsub_n_u8_z(pg, op1, op2) simde_svsub_n_u8_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint8_t -simde_svsub_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u8_m(pg, op1, op2); - #else - return simde_svsub_u8_m(pg, op1, simde_svdup_n_u8(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u8_m - #define svsub_n_u8_m(pg, op1, op2) simde_svsub_n_u8_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u16_x(pg, op1, op2); - #else - simde_svuint16_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_u16(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi16(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi16(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi16(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi16(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i16x8_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u16_x - #define svsub_u16_x(pg, op1, op2) simde_svsub_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u16_z(pg, op1, op2); - #else - return simde_x_svsel_u16_z(pg, simde_svsub_u16_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u16_z - #define svsub_u16_z(pg, op1, op2) simde_svsub_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u16_m(pg, op1, op2); - #else - return simde_svsel_u16(pg, simde_svsub_u16_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u16_m - #define svsub_u16_m(pg, op1, op2) simde_svsub_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u16_x(pg, op1, op2); - #else - return simde_svsub_u16_x(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u16_x - #define svsub_n_u16_x(pg, op1, op2) simde_svsub_n_u16_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u16_z(pg, op1, op2); - #else - return simde_svsub_u16_z(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u16_z - #define svsub_n_u16_z(pg, op1, op2) simde_svsub_n_u16_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint16_t -simde_svsub_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u16_m(pg, op1, op2); - #else - return simde_svsub_u16_m(pg, op1, simde_svdup_n_u16(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u16_m - #define svsub_n_u16_m(pg, op1, op2) simde_svsub_n_u16_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u32_x(pg, op1, op2); - #else - simde_svuint32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_u32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi32(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi32(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi32(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi32(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i32x4_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u32_x - #define svsub_u32_x(pg, op1, op2) simde_svsub_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u32_z(pg, op1, op2); - #else - return simde_x_svsel_u32_z(pg, simde_svsub_u32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u32_z - #define svsub_u32_z(pg, op1, op2) simde_svsub_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u32_m(pg, op1, op2); - #else - return simde_svsel_u32(pg, simde_svsub_u32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u32_m - #define svsub_u32_m(pg, op1, op2) simde_svsub_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u32_x(pg, op1, op2); - #else - return simde_svsub_u32_x(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u32_x - #define svsub_n_u32_x(pg, op1, op2) simde_svsub_n_u32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u32_z(pg, op1, op2); - #else - return simde_svsub_u32_z(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u32_z - #define svsub_n_u32_z(pg, op1, op2) simde_svsub_n_u32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint32_t -simde_svsub_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u32_m(pg, op1, op2); - #else - return simde_svsub_u32_m(pg, op1, simde_svdup_n_u32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u32_m - #define svsub_n_u32_m(pg, op1, op2) simde_svsub_n_u32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u64_x(pg, op1, op2); - #else - simde_svuint64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_u64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512i = _mm512_sub_epi64(op1.m512i, op2.m512i); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256i[0] = _mm256_sub_epi64(op1.m256i[0], op2.m256i[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { - r.m256i[i] = _mm256_sub_epi64(op1.m256i[i], op2.m256i[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { - r.m128i[i] = _mm_sub_epi64(op1.m128i[i], op2.m128i[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_i64x2_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u64_x - #define svsub_u64_x(pg, op1, op2) simde_svsub_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u64_z(pg, op1, op2); - #else - return simde_x_svsel_u64_z(pg, simde_svsub_u64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u64_z - #define svsub_u64_z(pg, op1, op2) simde_svsub_u64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_u64_m(pg, op1, op2); - #else - return simde_svsel_u64(pg, simde_svsub_u64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_u64_m - #define svsub_u64_m(pg, op1, op2) simde_svsub_u64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u64_x(pg, op1, op2); - #else - return simde_svsub_u64_x(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u64_x - #define svsub_n_u64_x(pg, op1, op2) simde_svsub_n_u64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u64_z(pg, op1, op2); - #else - return simde_svsub_u64_z(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u64_z - #define svsub_n_u64_z(pg, op1, op2) simde_svsub_n_u64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svuint64_t -simde_svsub_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_u64_m(pg, op1, op2); - #else - return simde_svsub_u64_m(pg, op1, simde_svdup_n_u64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_u64_m - #define svsub_n_u64_m(pg, op1, op2) simde_svsub_n_u64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f32_x(pg, op1, op2); - #else - simde_svfloat32_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r.neon = vsubq_f32(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512 = _mm512_sub_ps(op1.m512, op2.m512); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256[0] = _mm256_sub_ps(op1.m256[0], op2.m256[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { - r.m256[i] = _mm256_sub_ps(op1.m256[i], op2.m256[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { - r.m128[i] = _mm_sub_ps(op1.m128[i], op2.m128[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f32x4_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f32_x - #define svsub_f32_x(pg, op1, op2) simde_svsub_f32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f32_z(pg, op1, op2); - #else - return simde_x_svsel_f32_z(pg, simde_svsub_f32_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f32_z - #define svsub_f32_z(pg, op1, op2) simde_svsub_f32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f32_m(pg, op1, op2); - #else - return simde_svsel_f32(pg, simde_svsub_f32_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f32_m - #define svsub_f32_m(pg, op1, op2) simde_svsub_f32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_n_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f32_x(pg, op1, op2); - #else - return simde_svsub_f32_x(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f32_x - #define svsub_n_f32_x(pg, op1, op2) simde_svsub_n_f32_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_n_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f32_z(pg, op1, op2); - #else - return simde_svsub_f32_z(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f32_z - #define svsub_n_f32_z(pg, op1, op2) simde_svsub_n_f32_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat32_t -simde_svsub_n_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f32_m(pg, op1, op2); - #else - return simde_svsub_f32_m(pg, op1, simde_svdup_n_f32(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f32_m - #define svsub_n_f32_m(pg, op1, op2) simde_svsub_n_f32_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f64_x(pg, op1, op2); - #else - simde_svfloat64_t r; - HEDLEY_STATIC_CAST(void, pg); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r.neon = vsubq_f64(op1.neon, op2.neon); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) - r.m512d = _mm512_sub_pd(op1.m512d, op2.m512d); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r.m256d[0] = _mm256_sub_pd(op1.m256d[0], op2.m256d[0]); - #elif defined(SIMDE_X86_AVX2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { - r.m256d[i] = _mm256_sub_pd(op1.m256d[i], op2.m256d[i]); - } - #elif defined(SIMDE_X86_SSE2_NATIVE) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { - r.m128d[i] = _mm_sub_pd(op1.m128d[i], op2.m128d[i]); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r.altivec = vec_sub(op1.altivec, op2.altivec); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r.altivec = op1.altivec - op2.altivec; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r.v128 = wasm_f64x2_sub(op1.v128, op2.v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r.values = op1.values - op2.values; - #else - SIMDE_VECTORIZE - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { - r.values[i] = op1.values[i] - op2.values[i]; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f64_x - #define svsub_f64_x(pg, op1, op2) simde_svsub_f64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f64_z(pg, op1, op2); - #else - return simde_x_svsel_f64_z(pg, simde_svsub_f64_x(pg, op1, op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f64_z - #define svsub_f64_z(pg, op1, op2) simde_svsub_f64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_f64_m(pg, op1, op2); - #else - return simde_svsel_f64(pg, simde_svsub_f64_x(pg, op1, op2), op1); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_f64_m - #define svsub_f64_m(pg, op1, op2) simde_svsub_f64_m(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_n_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f64_x(pg, op1, op2); - #else - return simde_svsub_f64_x(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f64_x - #define svsub_n_f64_x(pg, op1, op2) simde_svsub_n_f64_x(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_n_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f64_z(pg, op1, op2); - #else - return simde_svsub_f64_z(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f64_z - #define svsub_n_f64_z(pg, op1, op2) simde_svsub_n_f64_z(pg, op1, op2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_svfloat64_t -simde_svsub_n_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { - #if defined(SIMDE_ARM_SVE_NATIVE) - return svsub_n_f64_m(pg, op1, op2); - #else - return simde_svsub_f64_m(pg, op1, simde_svdup_n_f64(op2)); - #endif -} -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef simde_svsub_n_f64_m - #define svsub_n_f64_m(pg, op1, op2) simde_svsub_n_f64_m(pg, op1, op2) -#endif - -#if defined(__cplusplus) - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_x (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_z (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_m (pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_x (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_x(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_x(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_z (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_z(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_z(pg, op1, op2); } - - SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_m (pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_m(pg, op1, op2); } - SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_m(pg, op1, op2); } -#elif defined(SIMDE_GENERIC_) - #define simde_svsub_x(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svsub_s8_x, \ - simde_svint16_t: simde_svsub_s16_x, \ - simde_svint32_t: simde_svsub_s32_x, \ - simde_svint64_t: simde_svsub_s64_x, \ - simde_svuint8_t: simde_svsub_u8_x, \ - simde_svuint16_t: simde_svsub_u16_x, \ - simde_svuint32_t: simde_svsub_u32_x, \ - simde_svuint64_t: simde_svsub_u64_x, \ - simde_svfloat32_t: simde_svsub_f32_x, \ - simde_svfloat64_t: simde_svsub_f64_x, \ - int8_t: simde_svsub_n_s8_x, \ - int16_t: simde_svsub_n_s16_x, \ - int32_t: simde_svsub_n_s32_x, \ - int64_t: simde_svsub_n_s64_x, \ - uint8_t: simde_svsub_n_u8_x, \ - uint16_t: simde_svsub_n_u16_x, \ - uint32_t: simde_svsub_n_u32_x, \ - uint64_t: simde_svsub_n_u64_x, \ - simde_float32: simde_svsub_n_f32_x, \ - simde_float64: simde_svsub_n_f64_x)((pg), (op1), (op2))) - - #define simde_svsub_z(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svsub_s8_z, \ - simde_svint16_t: simde_svsub_s16_z, \ - simde_svint32_t: simde_svsub_s32_z, \ - simde_svint64_t: simde_svsub_s64_z, \ - simde_svuint8_t: simde_svsub_u8_z, \ - simde_svuint16_t: simde_svsub_u16_z, \ - simde_svuint32_t: simde_svsub_u32_z, \ - simde_svuint64_t: simde_svsub_u64_z, \ - simde_svfloat32_t: simde_svsub_f32_z, \ - simde_svfloat64_t: simde_svsub_f64_z, \ - int8_t: simde_svsub_n_s8_z, \ - int16_t: simde_svsub_n_s16_z, \ - int32_t: simde_svsub_n_s32_z, \ - int64_t: simde_svsub_n_s64_z, \ - uint8_t: simde_svsub_n_u8_z, \ - uint16_t: simde_svsub_n_u16_z, \ - uint32_t: simde_svsub_n_u32_z, \ - uint64_t: simde_svsub_n_u64_z, \ - simde_float32: simde_svsub_n_f32_z, \ - simde_float64: simde_svsub_n_f64_z)((pg), (op1), (op2))) - - #define simde_svsub_m(pg, op1, op2) \ - (SIMDE_GENERIC_((op2), \ - simde_svint8_t: simde_svsub_s8_m, \ - simde_svint16_t: simde_svsub_s16_m, \ - simde_svint32_t: simde_svsub_s32_m, \ - simde_svint64_t: simde_svsub_s64_m, \ - simde_svuint8_t: simde_svsub_u8_m, \ - simde_svuint16_t: simde_svsub_u16_m, \ - simde_svuint32_t: simde_svsub_u32_m, \ - simde_svuint64_t: simde_svsub_u64_m, \ - simde_svfloat32_t: simde_svsub_f32_m, \ - simde_svfloat64_t: simde_svsub_f64_m, \ - int8_t: simde_svsub_n_s8_m, \ - int16_t: simde_svsub_n_s16_m, \ - int32_t: simde_svsub_n_s32_m, \ - int64_t: simde_svsub_n_s64_m, \ - uint8_t: simde_svsub_n_u8_m, \ - uint16_t: simde_svsub_n_u16_m, \ - uint32_t: simde_svsub_n_u32_m, \ - uint64_t: simde_svsub_n_u64_m, \ - simde_float32: simde_svsub_n_f32_m, \ - simde_float64: simde_svsub_n_f64_m)((pg), (op1), (op2))) -#endif -#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) - #undef svsub_x - #undef svsub_z - #undef svsub_m - #undef svsub_n_x - #undef svsub_n_z - #undef svsub_n_m - #define svsub_x(pg, op1, op2) simde_svsub_x((pg), (op1), (op2)) - #define svsub_z(pg, op1, op2) simde_svsub_z((pg), (op1), (op2)) - #define svsub_m(pg, op1, op2) simde_svsub_m((pg), (op1), (op2)) - #define svsub_n_x(pg, op1, op2) simde_svsub_n_x((pg), (op1), (op2)) - #define svsub_n_z(pg, op1, op2) simde_svsub_n_z((pg), (op1), (op2)) - #define svsub_n_m(pg, op1, op2) simde_svsub_n_m((pg), (op1), (op2)) -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* SIMDE_ARM_SVE_SUB_H */ -/* :: End simde/arm/sve/sub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#endif /* SIMDE_ARM_SVE_H */ -/* :: End simde/arm/sve.h :: */ diff --git a/src/simde/x86/aes.h b/src/simde/x86/aes.h deleted file mode 100644 index c97f14bcb..000000000 --- a/src/simde/x86/aes.h +++ /dev/null @@ -1,24504 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/aes.h :: */ -/* MIT License - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#if !defined(SIMDE_X86_AES_H) -#define SIMDE_X86_AES_H - -/* - * Advanced Encryption Standard - * @author Dani Huertas - * @email huertas.dani@gmail.com - * - * Based on the document FIPS PUB 197 - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -/* - * Multiplication in GF(2^8) - * http://en.wikipedia.org/wiki/Finite_field_arithmetic - * Irreducible polynomial m(x) = x8 + x4 + x3 + x + 1 - * - * NOTE: This function can be easily replaced with a look up table for a speed - * boost, at the expense of an increase in memory size. - -SIMDE_FUNCTION_ATTRIBUTES -uint8_t gmult(uint8_t a, uint8_t b) { - uint8_t p = 0, i = 0, hbs = 0; - - for (i = 0; i < 8; i++) { - if (b & 1) { - p ^= a; - } - - hbs = a & 0x80; - a <<= 1; - if (hbs) a ^= 0x1b; // 0000 0001 0001 1011 - b >>= 1; - } - - return (uint8_t)p; -} - */ - -#if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-aes.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_AES_H) -#define SIMDE_AES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) - -/* - * Number of columns (32-bit words) comprising the State. For this - * standard, Nb = 4. - */ -#define simde_x_aes_Nb 4 - -static uint8_t simde_x_aes_gmult_lookup_table[8][256] = { -{ // gmult(0x02, b); - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, - 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, - 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, - 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, - 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, - 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, - 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, - 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, - 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, - 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, - 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, - 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, - 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, - 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, - 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 -}, -{ // gmult(0x01, b); - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}, -{ // gmult(0x01, b); - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}, -{ // gmult(0x03, b); - 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, - 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, - 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, - 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, - 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, - 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, - 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, - 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, - 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, - 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, - 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, - 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, - 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, - 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, - 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, - 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a, -}, -{ // gmult(0x0e, b); - 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, - 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, - 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, - 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, - 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, - 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, - 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, - 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, - 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, - 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, - 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, - 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, - 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, - 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, - 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, - 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, -}, -{ // gmult(0x09, b); - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, - 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, - 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, - 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, - 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, - 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, - 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, - 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, - 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, - 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, - 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, - 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, - 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46, - -}, -{ // gmult(0x0d, b); - 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, - 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, - 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, - 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, - 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, - 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, - 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, - 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, - 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, - 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, - 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, - 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, - 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, - 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, - 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, - 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97, -}, -{ // gmult(0x0b, b); - 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, - 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, - 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, - 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, - 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, - 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, - 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, - 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, - 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, - 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, - 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, - 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, - 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, - 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, - 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, - 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3, -} -}; - -/* - * S-box transformation table - */ -static uint8_t simde_x_aes_s_box[256] = { - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, // 0 - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, // 1 - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, // 2 - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, // 3 - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, // 4 - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, // 5 - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, // 6 - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, // 7 - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, // 8 - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, // 9 - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, // a - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, // b - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, // c - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, // d - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, // e - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};// f - -/* - * Inverse S-box transformation table - */ -static uint8_t simde_x_aes_inv_s_box[256] = { - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, // 0 - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, // 1 - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, // 2 - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, // 3 - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, // 4 - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, // 5 - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, // 6 - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, // 7 - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, // 8 - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, // 9 - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, // a - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, // b - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, // c - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, // d - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, // e - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};// f - -/* - * Multiplication of 4 byte words - * m(x) = x4+1 - -SIMDE_FUNCTION_ATTRIBUTES -void coef_mult(uint8_t *a, uint8_t *b, uint8_t *d) { - - d[0] = gmult(a[0],b[0])^gmult(a[3],b[1])^gmult(a[2],b[2])^gmult(a[1],b[3]); - d[1] = gmult(a[1],b[0])^gmult(a[0],b[1])^gmult(a[3],b[2])^gmult(a[2],b[3]); - d[2] = gmult(a[2],b[0])^gmult(a[1],b[1])^gmult(a[0],b[2])^gmult(a[3],b[3]); - d[3] = gmult(a[3],b[0])^gmult(a[2],b[1])^gmult(a[1],b[2])^gmult(a[0],b[3]); -} -*/ - -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_coef_mult_lookup(int lookup_table_offset, uint8_t *b, uint8_t *d) { - int o = lookup_table_offset; - - #define gmultl(o,b) simde_x_aes_gmult_lookup_table[o][b] - d[0] = gmultl(o+0,b[0])^gmultl(o+3,b[1])^gmultl(o+2,b[2])^gmultl(o+1,b[3]); - d[1] = gmultl(o+1,b[0])^gmultl(o+0,b[1])^gmultl(o+3,b[2])^gmultl(o+2,b[3]); - d[2] = gmultl(o+2,b[0])^gmultl(o+1,b[1])^gmultl(o+0,b[2])^gmultl(o+3,b[3]); - d[3] = gmultl(o+3,b[0])^gmultl(o+2,b[1])^gmultl(o+1,b[2])^gmultl(o+0,b[3]); - #undef gmultl -} - -#endif - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_AES_H) */ -/* :: End simde/simde-aes.h :: */ - -/* - * Transformation in the Cipher and Inverse Cipher in which a Round - * Key is added to the State using an XOR operation. The length of a - * Round Key equals the size of the State (i.e., for Nb = 4, the Round - * Key length equals 128 bits/16 bytes). - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_add_round_key(uint8_t *state, simde__m128i_private w, uint8_t r) { - - int Nb = simde_x_aes_Nb; - uint8_t c; - - for (c = 0; c < Nb; c++) { - state[Nb*0+c] = state[Nb*0+c]^w.u8[4*Nb*r+4*c+0]; - state[Nb*1+c] = state[Nb*1+c]^w.u8[4*Nb*r+4*c+1]; - state[Nb*2+c] = state[Nb*2+c]^w.u8[4*Nb*r+4*c+2]; - state[Nb*3+c] = state[Nb*3+c]^w.u8[4*Nb*r+4*c+3]; - } -} - -/* - * Transformation in the Cipher that takes all of the columns of the - * State and mixes their data (independently of one another) to - * produce new columns. - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_mix_columns(uint8_t *state) { - - int Nb = simde_x_aes_Nb; - // uint8_t k[] = {0x02, 0x01, 0x01, 0x03}; // a(x) = {02} + {01}x + {01}x2 + {03}x3 - uint8_t i, j, col[4], res[4]; - - for (j = 0; j < Nb; j++) { - for (i = 0; i < 4; i++) { - col[i] = state[Nb*i+j]; - } - - //coef_mult(k, col, res); - simde_x_aes_coef_mult_lookup(0, col, res); - - for (i = 0; i < 4; i++) { - state[Nb*i+j] = res[i]; - } - } -} - -/* - * Transformation in the Inverse Cipher that is the inverse of - * MixColumns(). - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_inv_mix_columns(uint8_t *state) { - - int Nb = simde_x_aes_Nb; - // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3 - uint8_t i, j, col[4], res[4]; - - for (j = 0; j < Nb; j++) { - for (i = 0; i < 4; i++) { - col[i] = state[Nb*i+j]; - } - - //coef_mult(k, col, res); - simde_x_aes_coef_mult_lookup(4, col, res); - - for (i = 0; i < 4; i++) { - state[Nb*i+j] = res[i]; - } - } -} - -/* - * Transformation in the Cipher that processes the State by cyclically - * shifting the last three rows of the State by different offsets. - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_shift_rows(uint8_t *state) { - - int Nb = simde_x_aes_Nb; - uint8_t i, k, s, tmp; - - for (i = 1; i < 4; i++) { - // shift(1,4)=1; shift(2,4)=2; shift(3,4)=3 - // shift(r, 4) = r; - s = 0; - while (s < i) { - tmp = state[Nb*i+0]; - - for (k = 1; k < Nb; k++) { - state[Nb*i+k-1] = state[Nb*i+k]; - } - - state[Nb*i+Nb-1] = tmp; - s++; - } - } -} - -/* - * Transformation in the Inverse Cipher that is the inverse of - * ShiftRows(). - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_inv_shift_rows(uint8_t *state) { - - uint8_t Nb = simde_x_aes_Nb; - uint8_t i, k, s, tmp; - - for (i = 1; i < 4; i++) { - s = 0; - while (s < i) { - tmp = state[Nb*i+Nb-1]; - - for (k = Nb-1; k > 0; k--) { - state[Nb*i+k] = state[Nb*i+k-1]; - } - - state[Nb*i+0] = tmp; - s++; - } - } -} - -/* - * Transformation in the Cipher that processes the State using a non - * linear byte substitution table (S-box) that operates on each of the - * State bytes independently. - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_sub_bytes(uint8_t *state) { - - int Nb = simde_x_aes_Nb; - uint8_t i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - // s_box row: yyyy ---- - // s_box col: ---- xxxx - // s_box[16*(yyyy) + xxxx] == s_box[yyyyxxxx] - state[Nb*i+j] = simde_x_aes_s_box[state[Nb*i+j]]; - } - } -} - -/* - * Transformation in the Inverse Cipher that is the inverse of - * SubBytes(). - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_inv_sub_bytes(uint8_t *state) { - - int Nb = simde_x_aes_Nb; - uint8_t i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - state[Nb*i+j] = simde_x_aes_inv_s_box[state[Nb*i+j]]; - } - } -} - -/* - * Performs the AES cipher operation - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_enc(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) { - - int Nb = simde_x_aes_Nb; - uint8_t state[4*simde_x_aes_Nb]; - uint8_t r = 0, i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - state[Nb*i+j] = in.u8[i+4*j]; - } - } - - simde_x_aes_sub_bytes(state); - simde_x_aes_shift_rows(state); - - if (!is_last) - simde_x_aes_mix_columns(state); - - simde_x_aes_add_round_key(state, w, r); - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - out->u8[i+4*j] = state[Nb*i+j]; - } - } -} - -/* - * Performs the AES inverse cipher operation - */ -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_aes_dec(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) { - - int Nb = simde_x_aes_Nb; - uint8_t state[4*simde_x_aes_Nb]; - uint8_t r = 0, i, j; - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - state[Nb*i+j] = in.u8[i+4*j]; - } - } - - simde_x_aes_inv_shift_rows(state); - simde_x_aes_inv_sub_bytes(state); - - if (!is_last) - simde_x_aes_inv_mix_columns(state); - - simde_x_aes_add_round_key(state, w, r); - - for (i = 0; i < 4; i++) { - for (j = 0; j < Nb; j++) { - out->u8[i+4*j] = state[Nb*i+j]; - } - } -} -#endif // if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_aesenc_si128(simde__m128i a, simde__m128i round_key) { - #if defined(SIMDE_X86_AES_NATIVE) - return _mm_aesenc_si128(a, round_key); - #else - simde__m128i_private result_; - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m128i_private round_key_ = simde__m128i_to_private(round_key); - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - result_.neon_u8 = veorq_u8( - vaesmcq_u8(vaeseq_u8(a_.neon_u8, vdupq_n_u8(0))), - round_key_.neon_u8); - #else - simde_x_aes_enc(a_, &result_, round_key_, 0); - #endif - return simde__m128i_from_private(result_); - #endif -} -#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) - #define _mm_aesenc_si128(a, b) simde_mm_aesenc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_aesdec_si128(simde__m128i a, simde__m128i round_key) { - #if defined(SIMDE_X86_AES_NATIVE) - return _mm_aesdec_si128(a, round_key); - #else - simde__m128i_private result_; - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m128i_private round_key_ = simde__m128i_to_private(round_key); - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - result_.neon_u8 = veorq_u8( - vaesimcq_u8(vaesdq_u8(a_.neon_u8, vdupq_n_u8(0))), - round_key_.neon_u8); - #else - simde_x_aes_dec(a_, &result_, round_key_, 0); - #endif - return simde__m128i_from_private(result_); - #endif -} -#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) - #define _mm_aesdec_si128(a, b) simde_mm_aesdec_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_aesenclast_si128(simde__m128i a, simde__m128i round_key) { - #if defined(SIMDE_X86_AES_NATIVE) - return _mm_aesenclast_si128(a, round_key); - #else - simde__m128i_private result_; - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m128i_private round_key_ = simde__m128i_to_private(round_key); - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - result_.neon_u8 = vaeseq_u8(a_.neon_u8, vdupq_n_u8(0)); - result_.neon_i32 = veorq_s32(result_.neon_i32, round_key_.neon_i32); // _mm_xor_si128 - #else - simde_x_aes_enc(a_, &result_, round_key_, 1); - #endif - return simde__m128i_from_private(result_); - #endif -} -#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) - #define _mm_aesenclast_si128(a, b) simde_mm_aesenclast_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_aesdeclast_si128(simde__m128i a, simde__m128i round_key) { - #if defined(SIMDE_X86_AES_NATIVE) - return _mm_aesdeclast_si128(a, round_key); - #else - simde__m128i_private result_; - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m128i_private round_key_ = simde__m128i_to_private(round_key); - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - result_.neon_u8 = veorq_u8( - vaesdq_u8(a_.neon_u8, vdupq_n_u8(0)), - round_key_.neon_u8); - #else - simde_x_aes_dec(a_, &result_, round_key_, 1); - #endif - return simde__m128i_from_private(result_); - #endif -} -#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) - #define _mm_aesdeclast_si128(a, b) simde_mm_aesdeclast_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_aesimc_si128(simde__m128i a) { - #if defined(SIMDE_X86_AES_NATIVE) - return _mm_aesimc_si128(a); - #else - simde__m128i_private result_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) - result_.neon_u8 = vaesimcq_u8(a_.neon_u8); - #else - int Nb = simde_x_aes_Nb; - // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3 - uint8_t i, j, col[4], res[4]; - - for (j = 0; j < Nb; j++) { - for (i = 0; i < 4; i++) { - col[i] = a_.u8[Nb*j+i]; - } - - //coef_mult(k, col, res); - simde_x_aes_coef_mult_lookup(4, col, res); - - for (i = 0; i < 4; i++) { - result_.u8[Nb*j+i] = res[i]; - } - } - #endif - return simde__m128i_from_private(result_); - #endif -} -#if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) - #define _mm_aesimc_si128(a) simde_mm_aesimc_si128(a) -#endif - -#undef simde_x_aes_Nb - -#endif /* !defined(SIMDE_X86_AES_H) */ -/* :: End simde/x86/aes.h :: */ diff --git a/src/simde/x86/avx.h b/src/simde/x86/avx.h deleted file mode 100644 index b1ba340ed..000000000 --- a/src/simde/x86/avx.h +++ /dev/null @@ -1,34420 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ diff --git a/src/simde/x86/avx2.h b/src/simde/x86/avx2.h deleted file mode 100644 index 0478eaad5..000000000 --- a/src/simde/x86/avx2.h +++ /dev/null @@ -1,40181 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2019-2020 Michael R. Crusoe - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX2_H) -#define SIMDE_X86_AVX2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi8 - #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi16 - #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi32(simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi32 - #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi8 - #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi16 - #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi16(a, b); - #else - return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi16 - #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi32 - #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi32(a, b); - #else - return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi32 - #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi64 - #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm256_setzero_si256(); - - for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.m128i_private[h].i8[i] = 0; - } else if (srcpos > 15) { - r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; - } else { - r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; - } - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) -# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_alignr_epi8(a, b, count) \ - simde_mm256_set_m128i( \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_alignr_epi8 - #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_and_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_si256 - #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_andnot_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_si256 - #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi8 - #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi16 - #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadds_epi16(a, b); - #else - return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadds_epi16 - #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu8 - #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu16 - #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu8 - #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu16 - #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) -# define simde_mm_blend_epi32(a, b, imm8) \ - simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi32 - #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) -#elif defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi16(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi16 - #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi32(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi32 - #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_blendv_epi8(a, b, mask); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - mask_ = simde__m256i_to_private(mask); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); - r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(mask_.i8) tmp = mask_.i8 >> 7; - r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - int8_t tmp = mask_.i8[i] >> 7; - r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_epi8 - #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastb_epi8(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastb_epi8 - #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastb_epi8(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastb_epi8 - #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastw_epi16(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastw_epi16 - #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastw_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastw_epi16 - #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastd_epi32(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastd_epi32 - #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastd_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastd_epi32 - #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastq_epi64(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastq_epi64 - #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastq_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastq_epi64 - #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastss_ps(a); - #elif defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_shuffle_ps(a, a, 0); - #else - simde__m128_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastss_ps - #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastss_ps(a); - #else - simde__m256_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - __m128 tmp = _mm_permute_ps(a_.n, 0); - r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); - #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) - r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastss_ps - #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_broadcastsd_pd (simde__m128d a) { - return simde_mm_movedup_pd(a); -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastsd_pd - #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastsd_pd(a); - #else - simde__m256d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsd_pd - #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) - return _mm256_broadcastsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = a_; - r_.m128i_private[1] = a_; - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = a_.i64[1]; - r_.i64[2] = a_.i64[0]; - r_.i64[3] = a_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsi128_si256 - #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) - #undef _mm_broadcastsi128_si256 - #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i - imm8; - if(i >= (ssize/2)) { - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i + imm8; - if(i < (ssize/2)) { - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi8 - #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi16 - #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi32 - #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi64 - #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi8 - #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 > b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi16 - #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi32 - #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi64 - #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi16 - #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi32 - #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi64 - #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi32 - #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi64 - #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_epi64 - #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi16 - #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi32 - #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi64 - #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi32 - #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi64 - #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu32_epi64 - #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi8 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31){ - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i8[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi8 - #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi16 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i16[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi16 - #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extracti128_si256 - #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi32 - #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi32 - #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi32 - #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi32 - #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi32 - #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi32 - #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi32 - #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi32 - #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi64 - #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi64 - #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256i_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi64 - #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi64 - #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi64 - #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi64 - #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi64 - #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi64 - #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_ps - #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_ps - #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_ps - #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - src_ = simde__m256_to_private(src), - mask_ = simde__m256_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_ps - #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_ps - #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_ps - #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_ps - #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_ps - #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_pd - #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_pd - #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_pd - #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_pd - #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_pd - #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_pd - #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_pd - #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_pd - #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[ imm8 & 1 ] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_inserti128_si256 - #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_madd_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); - SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); - - SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); - SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); - product = a32x16 * b32x16; - - even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); - odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); - - r_.i32 = even + odd; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_madd_epi16 - #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maddubs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maddubs_epi16 - #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi32(mem_addr, mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi32 - #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi32(mem_addr, mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi32 - #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi64 - #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi64 - #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi32(mem_addr, mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi32 - #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi32(mem_addr, mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi32 - #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi64 - #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi64 - #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_max_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi8 - #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu8 - #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu16 - #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu32 - #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi16 - #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi32 - #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_min_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi8 - #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi16 - #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi32 - #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu8 - #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu16 - #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu32 - #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_movemask_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_movemask_epi8(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - uint32_t r = 0; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); - } - #else - r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); - } - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_epi8 - #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - const int a_offset1 = imm8 & 4; - const int b_offset1 = (imm8 & 3) << 2; - const int a_offset2 = (imm8 >> 3) & 4; - const int b_offset2 = ((imm8 >> 3) & 3) << 2; - - #if defined(simde_math_abs) - const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; - for (int i = 0 ; i < halfway_point ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); - r_.u16[halfway_point + i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mpsadbw_epu8 - #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhrs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi16 - #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi32(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi32 - #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_si256 - #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi16 - #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi32(a, b); - #else - simde__m256i_private - r_, - v_[] = { - simde__m256i_to_private(a), - simde__m256i_to_private(b) - }; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); - r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi32 - #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi16 - #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi32 - #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2x128_si256 - #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; - r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; - r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; - r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_epi64 - #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; - r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; - r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; - r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_pd - #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 7]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_epi32 - #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); - #else - return _mm256_permutevar8x32_ps(a, idx); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 7]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_ps - #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sad_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sad_epu8 - #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_shuffle_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { - r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; - r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi8 - #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_shuffle_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 32, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi32 - #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4, \ - 8, 9, 10, 11, \ - ((((imm8) ) & 3) + 8 + 4), \ - ((((imm8) >> 2) & 3) + 8 + 4), \ - ((((imm8) >> 4) & 3) + 8 + 4), \ - ((((imm8) >> 6) & 3) + 8 + 4) \ - ) }); })) -#else -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflehi_epi16 - #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7, \ - ((((imm8) ) & 3) + 8), \ - ((((imm8) >> 2) & 3) + 8), \ - ((((imm8) >> 4) & 3) + 8), \ - ((((imm8) >> 6) & 3) + 8), \ - 12, 13, 14, 15) }); })) -#else -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflelo_epi16 - #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi8 - #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi16 - #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi32 - #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi16 - #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi32 - #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi64 - #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* Note: There is no consistency in how compilers handle values outside of - the expected range, hence the discrepancy between what we allow and what - Intel specifies. Some compilers will return 0, others seem to just mask - off everything outside of the range. */ - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { - r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi16 - #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { - r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi32 - #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi64 - #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) - imm8; - r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_si256 - #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); - r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi32 - #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi32 - #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); - r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi64 - #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi64 - #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi16 - #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi32 - #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi16 - #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi32 - #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_srav_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); - r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srav_epi32 - #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srav_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - count_ = simde__m256i_to_private(count); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); - r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - if (shift > 31) shift = 31; - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srav_epi32 - #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi16 - #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi32 - #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi64 - #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - if (imm8 > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { - r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); - } - #else - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi16 - #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { - r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi32 - #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi64 - #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = imm8 + HEDLEY_STATIC_CAST(int, i); - r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_si256 - #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi32 - #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi32 - #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi64 - #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi64 - #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - return __builtin_nontemporal_load(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi8 - #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi16 - #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi16(a, b); - #else - return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi16 - #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi32 - #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi32(a, b); - #else - return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi32 - #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi64 - #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi8 - #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi16 - #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsubs_epi16(a, b); - #else - return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsubs_epi16 - #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu8 - #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu16 - #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_x_mm256_test_all_ones (simde__m256i a) { - simde__m256i_private a_ = simde__m256i_to_private(a); - int r; - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi8 - #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi16 - #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 0, 8, 1, 9, 4, 12, 5, 13); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi32 - #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi64 - #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi8 - #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 4, 20, 5, 21, 6, 22, 7, 23, - 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi16 - #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 2, 10, 3, 11, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi32 - #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi64 - #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_xor_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_si256 - #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX2_H) */ -/* :: End simde/x86/avx2.h :: */ diff --git a/src/simde/x86/avx512.h b/src/simde/x86/avx512.h deleted file mode 100644 index 1706a0f41..000000000 --- a/src/simde/x86/avx512.h +++ /dev/null @@ -1,99366 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_H) -#define SIMDE_X86_AVX512_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_TYPES_H) -#define SIMDE_X86_AVX512_TYPES_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for - * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte - * aligned even if we reduce the alignment requirements of other members. - * - * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the - * to/from private functions will break, and I'm not willing to change their APIs to use - * pointers (which would also require more verbose code on the caller side) just to make - * MSVC happy. - * - * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, - * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to - * fix this without requiring API changes (except transparently through macros), patches - * are welcome. - */ - -# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) -# if defined(SIMDE_X86_AVX512F_NATIVE) -# undef SIMDE_X86_AVX512F_NATIVE -# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") -# endif -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 -# else -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 -# endif - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_ALIGN_TO_16 __m128bh n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_BF16_NATIVE) - SIMDE_ALIGN_TO_32 __m256bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_AVX512_ALIGN __m512bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - SIMDE_AVX512_ALIGN __m512h n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512h_private; - - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; - SIMDE_AVX512_ALIGN simde__m128i m128i[4]; - SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; - SIMDE_AVX512_ALIGN simde__m256i m256i[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512i_private; - -/* Intel uses the same header (immintrin.h) for everything AVX and - * later. If native aliases are enabled, and the machine has native - * support for AVX imintrin.h will already have been included, which - * means simde__m512* will already have been defined. So, even - * if the machine doesn't support AVX512F we need to use the native - * type; it has already been defined. - * - * However, we also can't just assume that including immintrin.h does - * actually define these. It could be a compiler which supports AVX - * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we - * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, - * so we assume that if it's present AVX-512F has already been - * declared. - * - * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC - * uses the preprocessor to define all the _MM_CMPINT_* members, - * in most compilers they are simply normal enum members. However, - * all compilers I've looked at use an object-like macro for - * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT - * is included in case a compiler does the reverse, though I haven't - * run into one which does. - * - * As for the ICC check, unlike other compilers, merely using the - * AVX-512 types causes ICC to generate AVX-512 instructions. */ -#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && \ - (defined(SIMDE_X86_AVX512F_NATIVE) || \ - !(defined(HEDLEY_INTEL_VERSION) || (defined(HEDLEY_MSVC_VERSION) && !defined(__clang__)))) - typedef __m512 simde__m512; - typedef __m512i simde__m512i; - typedef __m512d simde__m512d; - - typedef __mmask8 simde__mmask8; - typedef __mmask16 simde__mmask16; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512_private simde__m512; - typedef simde__m512i_private simde__m512i; - typedef simde__m512d_private simde__m512d; - #endif - - typedef uint8_t simde__mmask8; - typedef uint16_t simde__mmask16; -#endif - -#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) - typedef __m128bh simde__m128bh; - typedef __m256bh simde__m256bh; - typedef __m512bh simde__m512bh; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m128bh_private simde__m128bh; - typedef simde__m256bh_private simde__m256bh; - typedef simde__m512bh_private simde__m512bh; - #endif -#endif - -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - typedef __m512h simde__m512h; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_FLOAT16_VECTOR) - typedef simde_float16 simde__m512h SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512h_private simde__m512h; - #endif -#endif - -/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is - * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang - * both are in avx512bwintrin.h), not AVX-512F. However, we don't have - * a good (not-compiler-specific) way to detect if these headers have - * been included. In compilers which support AVX-512F but not - * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) - * won't exist. - * - * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t - * in all compilers, so it's safe to use these instead of typedefs to - * __mmask{16,32}. If you run into a problem with this please file an - * issue and we'll try to figure out a work-around. */ -typedef uint32_t simde__mmask32; -typedef uint64_t simde__mmask64; -#if !defined(__mmask16) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint16_t __mmask16; - #else - #define __mmask16 uint16_t; - #endif -#endif -#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint32_t __mmask32; - #else - #define __mmask32 uint32_t; - #endif -#endif -#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - #if defined(HEDLEY_GCC_VERSION) - typedef unsigned long long __mmask64; - #else - typedef uint64_t __mmask64; - #endif - #else - #define __mmask64 uint64_t; - #endif -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m512 __m512; - typedef simde__m512i __m512i; - typedef simde__m512d __m512d; - #else - #define __m512 simde__m512 - #define __m512i simde__m512i - #define __m512d simde__m512d - #endif -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m128bh __m128bh; - typedef simde__m256bh __m256bh; - typedef simde__m512bh __m512bh; - #else - #define __m128bh simde__m128bh - #define __m256bh simde__m256bh - #define __m512bh simde__m512bh - #endif -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - //typedef simde__m128h __m128h; - //typedef simde__m256h __m256h; - typedef simde__m512h __m512h; - #else - //#define __m128h simde__m128h - //#define __m256h simde__m256h - #define __m512h simde__m512h - #endif -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h), "simde__m512h size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h_private), "simde__m512h_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h) == 32, "simde__m512h is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h_private) == 32, "simde__m512h_private is not 32-byte aligned"); -#endif - -#define SIMDE_MM_CMPINT_EQ 0 -#define SIMDE_MM_CMPINT_LT 1 -#define SIMDE_MM_CMPINT_LE 2 -#define SIMDE_MM_CMPINT_FALSE 3 -#define SIMDE_MM_CMPINT_NE 4 -#define SIMDE_MM_CMPINT_NLT 5 -#define SIMDE_MM_CMPINT_NLE 6 -#define SIMDE_MM_CMPINT_TRUE 7 -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) -#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ -#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT -#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE -#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE -#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE -#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT -#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE -#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh -simde__m128bh_from_private(simde__m128bh_private v) { - simde__m128bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh_private -simde__m128bh_to_private(simde__m128bh v) { - simde__m128bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh -simde__m256bh_from_private(simde__m256bh_private v) { - simde__m256bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh_private -simde__m256bh_to_private(simde__m256bh v) { - simde__m256bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh -simde__m512bh_from_private(simde__m512bh_private v) { - simde__m512bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh_private -simde__m512bh_to_private(simde__m512bh v) { - simde__m512bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde__m512_from_private(simde__m512_private v) { - simde__m512 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512_private -simde__m512_to_private(simde__m512 v) { - simde__m512_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde__m512i_from_private(simde__m512i_private v) { - simde__m512i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i_private -simde__m512i_to_private(simde__m512i v) { - simde__m512i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde__m512d_from_private(simde__m512d_private v) { - simde__m512d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d_private -simde__m512d_to_private(simde__m512d v) { - simde__m512d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde__m512h_from_private(simde__m512h_private v) { - simde__m512h r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h_private -simde__m512h_to_private(simde__m512h v) { - simde__m512h_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ -/* :: End simde/x86/avx512/types.h :: */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/2intersect.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Ashleigh Newman-Jones - */ - -#if !defined(SIMDE_X86_AVX512_2INTERSECT_H) -#define SIMDE_X86_AVX512_2INTERSECT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { - const int32_t m = a_.i32[i] == b_.i32[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_2intersect_epi32(a, b, k1, k2) _mm_2intersect_epi32(a, b, k1, k2) -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_2intersect_epi32 - #define _mm_2intersect_epi32(a, b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - _mm_2intersect_epi64(a, b, k1, k2); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { - const int32_t m = a_.i64[i] == b_.i64[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; - #endif -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_2intersect_epi64 - #define _mm_2intersect_epi64(a, b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - _mm256_2intersect_epi32(a, b, k1, k2); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { - const int32_t m = a_.i32[i] == b_.i32[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; - #endif -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_2intersect_epi32 - #define _mm256_2intersect_epi32(a, b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - _mm256_2intersect_epi64(a, b, k1, k2); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { - const int32_t m = a_.i64[i] == b_.i64[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; - #endif -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_2intersect_epi64 - #define _mm256_2intersect_epi64(a, b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_2intersect_epi32(simde__m512i a, simde__m512i b, simde__mmask16 *k1, simde__mmask16 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - _mm512_2intersect_epi32(a, b, k1, k2); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask16 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { - const int32_t m = a_.i32[i] == b_.i32[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; - #endif -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) - #undef _mm512_2intersect_epi32 - #define _mm512_2intersect_epi32(a, b, k1, k2) simde_mm512_2intersect_epi32(a, b, k1, k2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_2intersect_epi64(simde__m512i a, simde__m512i b, simde__mmask8 *k1, simde__mmask8 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - _mm512_2intersect_epi64(a, b, k1, k2); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { - const int32_t m = a_.i64[i] == b_.i64[j]; - k1_ |= m << i; - k2_ |= m << j; - } - } - - *k1 = k1_; - *k2 = k2_; - #endif -} -#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) - #undef _mm512_2intersect_epi64 - #define _mm512_2intersect_epi64(a, b, k1, k2) simde_mm512_2intersect_epi64(a, b, k1, k2) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_2INTERSECT_H) */ -/* :: End simde/x86/avx512/2intersect.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/4dpwssd.h :: */ -#if !defined(SIMDE_X86_AVX512_4DPWSSD_H) -#define SIMDE_X86_AVX512_4DPWSSD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dpwssd.h :: */ -#if !defined(SIMDE_X86_AVX512_DPWSSD_H) -#define SIMDE_X86_AVX512_DPWSSD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_MOV_H) -#define SIMDE_X86_AVX512_MOV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CAST_H) -#define SIMDE_X86_AVX512_CAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castpd_ps (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_ps - #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castpd_si512 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_si512 - #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castps_pd (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_pd - #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castps_si512 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_si512 - #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castph_si512 (simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castph_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castph_si512 - #define _mm512_castph_si512(a) simde_mm512_castph_si512(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_castsi512_ph (simde__m512i a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castsi512_ph(a); - #else - simde__m512h r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ph - #define _mm512_castsi512_ph(a) simde_mm512_castsi512_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castsi512_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ps - #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castsi512_pd (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_pd - #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd128_pd512 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd128_pd512(a); - #else - simde__m512d_private r_; - r_.m128d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd128_pd512 - #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd256_pd512 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd256_pd512(a); - #else - simde__m512d_private r_; - r_.m256d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd256_pd512 - #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm512_castpd512_pd128 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd128(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd128 - #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_castpd512_pd256 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd256(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m256d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd256 - #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps128_ps512 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps128_ps512(a); - #else - simde__m512_private r_; - r_.m128[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps128_ps512 - #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps256_ps512 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps256_ps512(a); - #else - simde__m512_private r_; - r_.m256[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps256_ps512 - #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_castps512_ps128 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps128(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps128 - #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_castps512_ps256 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps256(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m256[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps256 - #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi128_si512 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi128_si512(a); - #else - simde__m512i_private r_; - r_.m128i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi128_si512 - #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi256_si512 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi256_si512(a); - #else - simde__m512i_private r_; - r_.m256i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi256_si512 - #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_castsi512_si128 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si128(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si128 - #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_castsi512_si256 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si256(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m256i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si256 - #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ -/* :: End simde/x86/avx512/cast.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SET_H) -#define SIMDE_X86_AVX512_SET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/load.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LOAD_H) -#define SIMDE_X86_AVX512_LOAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_load_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); - #else - simde__m512d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_pd - #define _mm512_load_pd(a) simde_mm512_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_load_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); - #else - simde__m512 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ps - #define _mm512_load_ps(a) simde_mm512_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_load_ph (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h)); - #else - simde__m512h r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ph - #define _mm512_load_ph(a) simde_mm512_load_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_load_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); - #else - simde__m512i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); - return r; - #endif -} -#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_epi8 - #undef _mm512_load_epi16 - #undef _mm512_load_epi32 - #undef _mm512_load_epi64 - #undef _mm512_load_si512 - #define _mm512_load_si512(a) simde_mm512_load_si512(a) - #define _mm512_load_epi8(a) simde_mm512_load_si512(a) - #define _mm512_load_epi16(a) simde_mm512_load_si512(a) - #define _mm512_load_epi32(a) simde_mm512_load_si512(a) - #define _mm512_load_epi64(a) simde_mm512_load_si512(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ -/* :: End simde/x86/avx512/load.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, - int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - simde__m512i_private r_; - - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - r_.i16[16] = e16; - r_.i16[17] = e17; - r_.i16[18] = e18; - r_.i16[19] = e19; - r_.i16[20] = e20; - r_.i16[21] = e21; - r_.i16[22] = e22; - r_.i16[23] = e23; - r_.i16[24] = e24; - r_.i16[25] = e25; - r_.i16[26] = e26; - r_.i16[27] = e27; - r_.i16[28] = e28; - r_.i16[29] = e29; - r_.i16[30] = e30; - r_.i16[31] = e31; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi16 - #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - r_.i32[ 8] = e8; - r_.i32[ 9] = e9; - r_.i32[10] = e10; - r_.i32[11] = e11; - r_.i32[12] = e12; - r_.i32[13] = e13; - r_.i32[14] = e14; - r_.i32[15] = e15; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi32 - #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - r_.i64[4] = e4; - r_.i64[5] = e5; - r_.i64[6] = e6; - r_.i64[7] = e7; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi64 - #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, - uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, - uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, - uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, - uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m512i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - r_.u8[32] = e32; - r_.u8[33] = e33; - r_.u8[34] = e34; - r_.u8[35] = e35; - r_.u8[36] = e36; - r_.u8[37] = e37; - r_.u8[38] = e38; - r_.u8[39] = e39; - r_.u8[40] = e40; - r_.u8[41] = e41; - r_.u8[42] = e42; - r_.u8[43] = e43; - r_.u8[44] = e44; - r_.u8[45] = e45; - r_.u8[46] = e46; - r_.u8[47] = e47; - r_.u8[48] = e48; - r_.u8[49] = e49; - r_.u8[50] = e50; - r_.u8[51] = e51; - r_.u8[52] = e52; - r_.u8[53] = e53; - r_.u8[54] = e54; - r_.u8[55] = e55; - r_.u8[56] = e56; - r_.u8[57] = e57; - r_.u8[58] = e58; - r_.u8[59] = e59; - r_.u8[60] = e60; - r_.u8[61] = e61; - r_.u8[62] = e62; - r_.u8[63] = e63; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, - uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, - uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m512i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - r_.u16[16] = e16; - r_.u16[17] = e17; - r_.u16[18] = e18; - r_.u16[19] = e19; - r_.u16[20] = e20; - r_.u16[21] = e21; - r_.u16[22] = e22; - r_.u16[23] = e23; - r_.u16[24] = e24; - r_.u16[25] = e25; - r_.u16[26] = e26; - r_.u16[27] = e27; - r_.u16[28] = e28; - r_.u16[29] = e29; - r_.u16[30] = e30; - r_.u16[31] = e31; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, - uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - simde__m512i_private r_; - - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - r_.u32[ 8] = e8; - r_.u32[ 9] = e9; - r_.u32[10] = e10; - r_.u32[11] = e11; - r_.u32[12] = e12; - r_.u32[13] = e13; - r_.u32[14] = e14; - r_.u32[15] = e15; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m512i_private r_; - - r_.u64[ 0] = e0; - r_.u64[ 1] = e1; - r_.u64[ 2] = e2; - r_.u64[ 3] = e3; - r_.u64[ 4] = e4; - r_.u64[ 5] = e5; - r_.u64[ 6] = e6; - r_.u64[ 7] = e7; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, - int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, - int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, - int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) - return _mm512_set_epi8( - e63, e62, e61, e60, e59, e58, e57, e56, - e55, e54, e53, e52, e51, e50, e49, e48, - e47, e46, e45, e44, e43, e42, e41, e40, - e39, e38, e37, e36, e35, e34, e33, e32, - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0 - ); - #else - simde__m512i_private r_; - - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - r_.i8[32] = e32; - r_.i8[33] = e33; - r_.i8[34] = e34; - r_.i8[35] = e35; - r_.i8[36] = e36; - r_.i8[37] = e37; - r_.i8[38] = e38; - r_.i8[39] = e39; - r_.i8[40] = e40; - r_.i8[41] = e41; - r_.i8[42] = e42; - r_.i8[43] = e43; - r_.i8[44] = e44; - r_.i8[45] = e45; - r_.i8[46] = e46; - r_.i8[47] = e47; - r_.i8[48] = e48; - r_.i8[49] = e49; - r_.i8[50] = e50; - r_.i8[51] = e51; - r_.i8[52] = e52; - r_.i8[53] = e53; - r_.i8[54] = e54; - r_.i8[55] = e55; - r_.i8[56] = e56; - r_.i8[57] = e57; - r_.i8[58] = e58; - r_.i8[59] = e59; - r_.i8[60] = e60; - r_.i8[61] = e61; - r_.i8[62] = e62; - r_.i8[63] = e63; - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi8 - #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m128i v[] = { d, c, b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m128i[0] = d; - r_.m128i[1] = c; - r_.m128i[2] = b; - r_.m128i[3] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_set_m256 (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256 v[] = { b, a }; - return simde_mm512_load_ps(HEDLEY_STATIC_CAST(__m512 *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512_private r_; - - r_.m256[0] = b; - r_.m256[1] = a; - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256i v[] = { b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m256i[0] = b; - r_.m256i[1] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_set_m256d (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256d v[] = { b, a }; - return simde_mm512_load_pd(HEDLEY_STATIC_CAST(__m512d *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512d_private r_; - - r_.m256d[0] = b; - r_.m256d[1] = a; - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e0; - r_.f32[ 1] = e1; - r_.f32[ 2] = e2; - r_.f32[ 3] = e3; - r_.f32[ 4] = e4; - r_.f32[ 5] = e5; - r_.f32[ 6] = e6; - r_.f32[ 7] = e7; - r_.f32[ 8] = e8; - r_.f32[ 9] = e9; - r_.f32[10] = e10; - r_.f32[11] = e11; - r_.f32[12] = e12; - r_.f32[13] = e13; - r_.f32[14] = e14; - r_.f32[15] = e15; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ps - #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - r_.f64[4] = e4; - r_.f64[5] = e5; - r_.f64[6] = e6; - r_.f64[7] = e7; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_pd - #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set_ph (simde_float16 e31, simde_float16 e30, simde_float16 e29, simde_float16 e28, simde_float16 e27, simde_float16 e26, simde_float16 e25, simde_float16 e24, - simde_float16 e23, simde_float16 e22, simde_float16 e21, simde_float16 e20, simde_float16 e19, simde_float16 e18, simde_float16 e17, simde_float16 e16, - simde_float16 e15, simde_float16 e14, simde_float16 e13, simde_float16 e12, simde_float16 e11, simde_float16 e10, simde_float16 e9, simde_float16 e8, - simde_float16 e7, simde_float16 e6, simde_float16 e5, simde_float16 e4, simde_float16 e3, simde_float16 e2, simde_float16 e1, simde_float16 e0) { - simde__m512h_private r_; - - r_.f16[0] = e0; - r_.f16[1] = e1; - r_.f16[2] = e2; - r_.f16[3] = e3; - r_.f16[4] = e4; - r_.f16[5] = e5; - r_.f16[6] = e6; - r_.f16[7] = e7; - r_.f16[8] = e8; - r_.f16[9] = e9; - r_.f16[10] = e10; - r_.f16[11] = e11; - r_.f16[12] = e12; - r_.f16[13] = e13; - r_.f16[14] = e14; - r_.f16[15] = e15; - r_.f16[16] = e16; - r_.f16[17] = e17; - r_.f16[18] = e18; - r_.f16[19] = e19; - r_.f16[20] = e20; - r_.f16[21] = e21; - r_.f16[22] = e22; - r_.f16[23] = e23; - r_.f16[24] = e24; - r_.f16[25] = e25; - r_.f16[26] = e26; - r_.f16[27] = e27; - r_.f16[28] = e28; - r_.f16[29] = e29; - r_.f16[30] = e30; - r_.f16[31] = e31; - - return simde__m512h_from_private(r_); -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ph - #define _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ -/* :: End simde/x86/avx512/set.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi8(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi8 - #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi16(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi16 - #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi32(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi32 - #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi64(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi64 - #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_pd(src, k, a); - #else - return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_pd - #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_ps(src, k, a); - #else - return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_ps - #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi8(src, k, a); - #else - simde__m256i_private - r_, - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi8 - #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi16(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi16 - #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi32(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi32 - #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi64(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi64 - #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_pd(src, k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_pd - #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_ps(src, k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_ps - #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi8(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi8 - #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi16(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi16 - #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi32(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi32 - #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi64(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi64 - #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_pd(src, k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_pd - #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_ps(src, k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_ps - #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_mask_mov_ph (simde__m512h src, simde__mmask32 k, simde__m512h a) { - return simde_mm512_castsi512_ph(simde_mm512_mask_mov_epi16(simde_mm512_castph_si512(src), k, simde_mm512_castph_si512(a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi8(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi8 - #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi16(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi16 - #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi32(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi32 - #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi64(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi64 - #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_pd(k, a); - #else - return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_pd - #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_ps(k, a); - #else - return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_ps - #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi8(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi8 - #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi16(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi16 - #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi32(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi32 - #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi64(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi64 - #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_pd(k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_pd - #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_ps(k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_ps - #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi8(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi8 - #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi16(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi16 - #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi32(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi32 - #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi64(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi64 - #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_pd(k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_pd - #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_ps(k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_ps - #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ -/* :: End simde/x86/avx512/mov.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_dpwssd_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_dpwssd_epi32(src, a, b); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - int32_t x1_ SIMDE_VECTOR(32); - int32_t x2_ SIMDE_VECTOR(32); - simde__m128i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - a_.i16, a_.i16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - b_.i16, b_.i16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (r1_[0].i32 * r2_[0].i32) + - (r1_[1].i32 * r2_[1].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { - src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - } - #endif - - return simde__m128i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_dpwssd_epi32 - #define _mm_dpwssd_epi32(src, a, b) simde_mm_dpwssd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_dpwssd_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_mask_dpwssd_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dpwssd_epi32 - #define _mm_mask_dpwssd_epi32(src, k, a, b) simde_mm_mask_dpwssd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_dpwssd_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_maskz_dpwssd_epi32(k, src, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dpwssd_epi32 - #define _mm_maskz_dpwssd_epi32(k, src, a, b) simde_mm_maskz_dpwssd_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_dpwssd_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_dpwssd_epi32(src, a, b); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - int32_t x1_ SIMDE_VECTOR(64); - int32_t x2_ SIMDE_VECTOR(64); - simde__m256i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - a_.i16, a_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - b_.i16, b_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (r1_[0].i32 * r2_[0].i32) + - (r1_[1].i32 * r2_[1].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { - src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - } - #endif - - return simde__m256i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_dpwssd_epi32 - #define _mm256_dpwssd_epi32(src, a, b) simde_mm256_dpwssd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_dpwssd_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_mask_dpwssd_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dpwssd_epi32 - #define _mm256_mask_dpwssd_epi32(src, k, a, b) simde_mm256_mask_dpwssd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_dpwssd_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_maskz_dpwssd_epi32(k, src, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dpwssd_epi32 - #define _mm256_maskz_dpwssd_epi32(k, src, a, b) simde_mm256_maskz_dpwssd_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_dpwssd_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_dpwssd_epi32(src, a, b); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - int32_t x1_ SIMDE_VECTOR(128); - int32_t x2_ SIMDE_VECTOR(128); - simde__m512i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - a_.i16, a_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - b_.i16, b_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (r1_[0].i32 * r2_[0].i32) + - (r1_[1].i32 * r2_[1].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { - src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - } - #endif - - return simde__m512i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_dpwssd_epi32 - #define _mm512_dpwssd_epi32(src, a, b) simde_mm512_dpwssd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_dpwssd_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_mask_dpwssd_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dpwssd_epi32 - #define _mm512_mask_dpwssd_epi32(src, k, a, b) simde_mm512_mask_dpwssd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_dpwssd_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_maskz_dpwssd_epi32(k, src, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpwssd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dpwssd_epi32 - #define _mm512_maskz_dpwssd_epi32(k, src, a, b) simde_mm512_maskz_dpwssd_epi32(k, src, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DPWSSD_H) */ -/* :: End simde/x86/avx512/dpwssd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SET1_H) -#define SIMDE_X86_AVX512_SET1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi8(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi8 - #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi8 - #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi8 - #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi16(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi16 - #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi16 - #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi16 - #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi32(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi32 - #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi32 - #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi32 - #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi64 (int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi64(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi64 - #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi64 - #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi64 - #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu8 (uint8_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu16 (uint16_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu32 (uint32_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu64 (uint64_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_ps(a); - #else - simde__m512_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ps - #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_pd(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_pd - #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set1_ph (simde_float16 a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_set1_ph(a); - #else - simde__m512h_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.f16[i] = a; - } - - return simde__m512h_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ph - #define _mm512_set1_ph(a) simde_mm512_set1_ph(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ -/* :: End simde/x86/avx512/set1.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/add.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_ADD_H) -#define SIMDE_X86_AVX512_ADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2019-2020 Michael R. Crusoe - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX2_H) -#define SIMDE_X86_AVX2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi8 - #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi16 - #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi32(simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi32 - #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi8 - #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi16 - #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi16(a, b); - #else - return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi16 - #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi32 - #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi32(a, b); - #else - return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi32 - #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi64 - #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm256_setzero_si256(); - - for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.m128i_private[h].i8[i] = 0; - } else if (srcpos > 15) { - r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; - } else { - r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; - } - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) -# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_alignr_epi8(a, b, count) \ - simde_mm256_set_m128i( \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_alignr_epi8 - #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_and_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_si256 - #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_andnot_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_si256 - #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi8 - #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi16 - #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadds_epi16(a, b); - #else - return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadds_epi16 - #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu8 - #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu16 - #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu8 - #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu16 - #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) -# define simde_mm_blend_epi32(a, b, imm8) \ - simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi32 - #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) -#elif defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi16(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi16 - #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi32(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi32 - #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_blendv_epi8(a, b, mask); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - mask_ = simde__m256i_to_private(mask); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); - r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(mask_.i8) tmp = mask_.i8 >> 7; - r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - int8_t tmp = mask_.i8[i] >> 7; - r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_epi8 - #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastb_epi8(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastb_epi8 - #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastb_epi8(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastb_epi8 - #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastw_epi16(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastw_epi16 - #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastw_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastw_epi16 - #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastd_epi32(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastd_epi32 - #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastd_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastd_epi32 - #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastq_epi64(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastq_epi64 - #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastq_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastq_epi64 - #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastss_ps(a); - #elif defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_shuffle_ps(a, a, 0); - #else - simde__m128_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastss_ps - #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastss_ps(a); - #else - simde__m256_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - __m128 tmp = _mm_permute_ps(a_.n, 0); - r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); - #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) - r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastss_ps - #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_broadcastsd_pd (simde__m128d a) { - return simde_mm_movedup_pd(a); -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastsd_pd - #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastsd_pd(a); - #else - simde__m256d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsd_pd - #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) - return _mm256_broadcastsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = a_; - r_.m128i_private[1] = a_; - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = a_.i64[1]; - r_.i64[2] = a_.i64[0]; - r_.i64[3] = a_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsi128_si256 - #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) - #undef _mm_broadcastsi128_si256 - #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i - imm8; - if(i >= (ssize/2)) { - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i + imm8; - if(i < (ssize/2)) { - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi8 - #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi16 - #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi32 - #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi64 - #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi8 - #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 > b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi16 - #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi32 - #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi64 - #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi16 - #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi32 - #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi64 - #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi32 - #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi64 - #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_epi64 - #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi16 - #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi32 - #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi64 - #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi32 - #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi64 - #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu32_epi64 - #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi8 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31){ - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i8[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi8 - #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi16 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i16[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi16 - #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extracti128_si256 - #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi32 - #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi32 - #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi32 - #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi32 - #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi32 - #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi32 - #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi32 - #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi32 - #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi64 - #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi64 - #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256i_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi64 - #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi64 - #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi64 - #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi64 - #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi64 - #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi64 - #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_ps - #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_ps - #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_ps - #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - src_ = simde__m256_to_private(src), - mask_ = simde__m256_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_ps - #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_ps - #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_ps - #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_ps - #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_ps - #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_pd - #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_pd - #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_pd - #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_pd - #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_pd - #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_pd - #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_pd - #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_pd - #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[ imm8 & 1 ] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_inserti128_si256 - #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_madd_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); - SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); - - SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); - SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); - product = a32x16 * b32x16; - - even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); - odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); - - r_.i32 = even + odd; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_madd_epi16 - #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maddubs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maddubs_epi16 - #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi32(mem_addr, mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi32 - #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi32(mem_addr, mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi32 - #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi64 - #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi64 - #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi32(mem_addr, mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi32 - #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi32(mem_addr, mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi32 - #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi64 - #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi64 - #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_max_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi8 - #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu8 - #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu16 - #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu32 - #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi16 - #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi32 - #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_min_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi8 - #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi16 - #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi32 - #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu8 - #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu16 - #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu32 - #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_movemask_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_movemask_epi8(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - uint32_t r = 0; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); - } - #else - r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); - } - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_epi8 - #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - const int a_offset1 = imm8 & 4; - const int b_offset1 = (imm8 & 3) << 2; - const int a_offset2 = (imm8 >> 3) & 4; - const int b_offset2 = ((imm8 >> 3) & 3) << 2; - - #if defined(simde_math_abs) - const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; - for (int i = 0 ; i < halfway_point ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); - r_.u16[halfway_point + i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mpsadbw_epu8 - #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhrs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi16 - #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi32(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi32 - #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_si256 - #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi16 - #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi32(a, b); - #else - simde__m256i_private - r_, - v_[] = { - simde__m256i_to_private(a), - simde__m256i_to_private(b) - }; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); - r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi32 - #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi16 - #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi32 - #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2x128_si256 - #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; - r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; - r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; - r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_epi64 - #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; - r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; - r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; - r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_pd - #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 7]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_epi32 - #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); - #else - return _mm256_permutevar8x32_ps(a, idx); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 7]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_ps - #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sad_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sad_epu8 - #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_shuffle_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { - r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; - r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi8 - #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_shuffle_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 32, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi32 - #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4, \ - 8, 9, 10, 11, \ - ((((imm8) ) & 3) + 8 + 4), \ - ((((imm8) >> 2) & 3) + 8 + 4), \ - ((((imm8) >> 4) & 3) + 8 + 4), \ - ((((imm8) >> 6) & 3) + 8 + 4) \ - ) }); })) -#else -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflehi_epi16 - #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7, \ - ((((imm8) ) & 3) + 8), \ - ((((imm8) >> 2) & 3) + 8), \ - ((((imm8) >> 4) & 3) + 8), \ - ((((imm8) >> 6) & 3) + 8), \ - 12, 13, 14, 15) }); })) -#else -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflelo_epi16 - #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi8 - #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi16 - #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi32 - #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi16 - #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi32 - #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi64 - #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* Note: There is no consistency in how compilers handle values outside of - the expected range, hence the discrepancy between what we allow and what - Intel specifies. Some compilers will return 0, others seem to just mask - off everything outside of the range. */ - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { - r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi16 - #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { - r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi32 - #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi64 - #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) - imm8; - r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_si256 - #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); - r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi32 - #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi32 - #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); - r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi64 - #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi64 - #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi16 - #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi32 - #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi16 - #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi32 - #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_srav_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); - r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srav_epi32 - #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srav_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - count_ = simde__m256i_to_private(count); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); - r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - if (shift > 31) shift = 31; - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srav_epi32 - #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi16 - #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi32 - #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi64 - #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - if (imm8 > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { - r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); - } - #else - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi16 - #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { - r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi32 - #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi64 - #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = imm8 + HEDLEY_STATIC_CAST(int, i); - r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_si256 - #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi32 - #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi32 - #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi64 - #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi64 - #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - return __builtin_nontemporal_load(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi8 - #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi16 - #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi16(a, b); - #else - return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi16 - #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi32 - #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi32(a, b); - #else - return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi32 - #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi64 - #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi8 - #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi16 - #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsubs_epi16(a, b); - #else - return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsubs_epi16 - #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu8 - #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu16 - #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_x_mm256_test_all_ones (simde__m256i a) { - simde__m256i_private a_ = simde__m256i_to_private(a); - int r; - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi8 - #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi16 - #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 0, 8, 1, 9, 4, 12, 5, 13); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi32 - #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi64 - #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi8 - #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 4, 20, 5, 21, 6, 22, 7, 23, - 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi16 - #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 2, 10, 3, 11, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi32 - #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi64 - #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_xor_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_si256 - #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX2_H) */ -/* :: End simde/x86/avx2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi8 - #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi8 - #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi16 - #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi16 - #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi32 - #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi32 - #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi64 - #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi64 - #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_mask_add_ss(src, k, a, b); - #elif 1 - simde__m128_private - src_ = simde__m128_to_private(src), - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_ss - #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_maskz_add_ss(k, a, b); - #elif 1 - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_ss - #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_add_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi16 - #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_add_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi16 - #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi32 - #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi32 - #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi64 - #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi64 - #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi8 - #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi8 - #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi8 - #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi16 - #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi16 - #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi16 - #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi32 - #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi32 - #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi32 - #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi64 - #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi64 - #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi64 - #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_add_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_ps - #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_ps - #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_ps - #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_add_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_pd - #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_pd - #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_pd - #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ -/* :: End simde/x86/avx512/add.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_4dpwssd_epi32 (simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b); - #else - simde__m128i_private bv = simde__m128i_to_private(simde_mm_loadu_epi32(b)); - simde__m512i r; - - r = simde_mm512_dpwssd_epi32(src, a0, simde_mm512_set1_epi32(bv.i32[0])); - r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a1, simde_mm512_set1_epi32(bv.i32[1])), r); - r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a2, simde_mm512_set1_epi32(bv.i32[2])), r); - r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a3, simde_mm512_set1_epi32(bv.i32[3])), r); - - return r; - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_4dpwssd_epi32 - #define _mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b) simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_4dpwssd_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b)); - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_mask_4dpwssd_epi32 - #define _mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b) simde_mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_4dpwssd_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_mask_4dpwssd_epi32(k, src, a0, a1, a2, a3, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b)); - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_maskz_4dpwssd_epi32 - #define _mm512_maskz_4dpwssd_epi32(k, src, a0, a1, a2, a3, b) simde_mm512_maskz_4dpwssd_epi32(k, src, a0, a1, a2, a3, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_4DPWSSD_H) */ -/* :: End simde/x86/avx512/4dpwssd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/4dpwssds.h :: */ -#if !defined(SIMDE_X86_AVX512_4DPWSSDS_H) -#define SIMDE_X86_AVX512_4DPWSSDS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dpwssds.h :: */ -#if !defined(SIMDE_X86_AVX512_DPWSSDS_H) -#define SIMDE_X86_AVX512_DPWSSDS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_dpwssds_epi32 (simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_dpwssds_epi32(src, a, b); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t x1_ SIMDE_VECTOR(32); - int32_t x2_ SIMDE_VECTOR(32); - simde__m128i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - a_.i16, a_.i16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - b_.i16, b_.i16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); - uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); - uint32_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) - ); - } - #endif - - return simde__m128i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_dpwssds_epi32 - #define _mm_dpwssds_epi32(src, a, b) simde_mm_dpwssds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_dpwssds_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_dpwssds_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dpwssds_epi32 - #define _mm_mask_dpwssds_epi32(src, k, a, b) simde_mm_mask_dpwssds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_dpwssds_epi32 (simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_dpwssds_epi32(k, src, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dpwssds_epi32 - #define _mm_maskz_dpwssds_epi32(k, src, a, b) simde_mm_maskz_dpwssds_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_dpwssds_epi32 (simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_dpwssds_epi32(src, a, b); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t x1_ SIMDE_VECTOR(64); - int32_t x2_ SIMDE_VECTOR(64); - simde__m256i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - a_.i16, a_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - b_.i16, b_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); - uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); - uint32_t ru SIMDE_VECTOR(32) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) - ); - } - #endif - - return simde__m256i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_dpwssds_epi32 - #define _mm256_dpwssds_epi32(src, a, b) simde_mm256_dpwssds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_dpwssds_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_dpwssds_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dpwssds_epi32 - #define _mm256_mask_dpwssds_epi32(src, k, a, b) simde_mm256_mask_dpwssds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_dpwssds_epi32 (simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_dpwssds_epi32(k, src, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dpwssds_epi32 - #define _mm256_maskz_dpwssds_epi32(k, src, a, b) simde_mm256_maskz_dpwssds_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_dpwssds_epi32 (simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_dpwssds_epi32(src, a, b); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t x1_ SIMDE_VECTOR(128); - int32_t x2_ SIMDE_VECTOR(128); - simde__m512i_private - r1_[2], - r2_[2]; - - a_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - a_.i16, a_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - b_.i16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - b_.i16, b_.i16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.i16); - SIMDE_CONVERT_VECTOR_(x2_, b_.i16); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); - uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); - uint32_t ru SIMDE_VECTOR(64) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + - HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) - ); - } - #endif - - return simde__m512i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_dpwssds_epi32 - #define _mm512_dpwssds_epi32(src, a, b) simde_mm512_dpwssds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_dpwssds_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_mask_dpwssds_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dpwssds_epi32 - #define _mm512_mask_dpwssds_epi32(src, k, a, b) simde_mm512_mask_dpwssds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_dpwssds_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_maskz_dpwssds_epi32(k, src, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpwssds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dpwssds_epi32 - #define _mm512_maskz_dpwssds_epi32(k, src, a, b) simde_mm512_maskz_dpwssds_epi32(k, src, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DPWSSDS_H) */ -/* :: End simde/x86/avx512/dpwssds.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/adds.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_ADDS_H) -#define SIMDE_X86_AVX512_ADDS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_adds_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_adds_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_adds_epi8 - #define _mm_mask_adds_epi8(src, k, a, b) simde_mm_mask_adds_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_adds_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_adds_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_adds_epi8 - #define _mm_maskz_adds_epi8(k, a, b) simde_mm_maskz_adds_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_adds_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_adds_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_adds_epi16 - #define _mm_mask_adds_epi16(src, k, a, b) simde_mm_mask_adds_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_adds_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_adds_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_adds_epi16 - #define _mm_maskz_adds_epi16(k, a, b) simde_mm_maskz_adds_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_adds_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_adds_epi8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_adds_epi8 - #define _mm256_mask_adds_epi8(src, k, a, b) simde_mm256_mask_adds_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_adds_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_adds_epi8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_adds_epi8 - #define _mm256_maskz_adds_epi8(k, a, b) simde_mm256_maskz_adds_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_adds_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_adds_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_adds_epi16 - #define _mm256_mask_adds_epi16(src, k, a, b) simde_mm256_mask_adds_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_adds_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_adds_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_adds_epi16 - #define _mm256_maskz_adds_epi16(k, a, b) simde_mm256_maskz_adds_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epi8 - #define _mm512_adds_epi8(a, b) simde_mm512_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epi8 - #define _mm512_mask_adds_epi8(src, k, a, b) simde_mm512_mask_adds_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epi8 - #define _mm512_maskz_adds_epi8(k, a, b) simde_mm512_maskz_adds_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epi16 - #define _mm512_adds_epi16(a, b) simde_mm512_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epi16 - #define _mm512_mask_adds_epi16(src, k, a, b) simde_mm512_mask_adds_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epi16 - #define _mm512_maskz_adds_epi16(k, a, b) simde_mm512_maskz_adds_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epu8 - #define _mm512_adds_epu8(a, b) simde_mm512_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epu8 - #define _mm512_mask_adds_epu8(src, k, a, b) simde_mm512_mask_adds_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epu8 - #define _mm512_maskz_adds_epu8(k, a, b) simde_mm512_maskz_adds_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epu16 - #define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epu16 - #define _mm512_mask_adds_epu16(src, k, a, b) simde_mm512_mask_adds_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epu16 - #define _mm512_maskz_adds_epu16(k, a, b) simde_mm512_maskz_adds_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_adds_epi32(simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vqaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6) - r_.altivec_i32 = vec_adds(a_.altivec_i32, b_.altivec_i32); - #else - #if defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/56544654/501126 */ - const __m128i int_max = _mm_set1_epi32(INT32_MAX); - - /* normal result (possibly wraps around) */ - const __m128i sum = _mm_add_epi32(a_.n, b_.n); - - /* If result saturates, it has the same sign as both a and b */ - const __m128i sign_bit = _mm_srli_epi32(a_.n, 31); /* shift sign to lowest bit */ - - #if defined(SIMDE_X86_AVX512VL_NATIVE) - const __m128i overflow = _mm_ternarylogic_epi32(a_.n, b_.n, sum, 0x42); - #else - const __m128i sign_xor = _mm_xor_si128(a_.n, b_.n); - const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.n, sum)); - #endif - - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - r_.n = _mm_mask_add_epi32(sum, _mm_movepi32_mask(overflow), int_max, sign_bit); - #else - const __m128i saturated = _mm_add_epi32(int_max, sign_bit); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - r_.n = - _mm_castps_si128( - _mm_blendv_ps( - _mm_castsi128_ps(sum), - _mm_castsi128_ps(saturated), - _mm_castsi128_ps(overflow) - ) - ); - #else - const __m128i overflow_mask = _mm_srai_epi32(overflow, 31); - r_.n = - _mm_or_si128( - _mm_and_si128(overflow_mask, saturated), - _mm_andnot_si128(overflow_mask, sum) - ); - #endif - #endif - #elif defined(SIMDE_VECTOR_SCALAR) - uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); - uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); - uint32_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); - } - #endif - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_adds_epi32(simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_adds_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SCALAR) - uint32_t au SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); - uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); - uint32_t ru SIMDE_VECTOR(32) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_adds_epi32(simde__m512i a, simde__m512i b) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_adds_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_adds_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SCALAR) - uint32_t au SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); - uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); - uint32_t ru SIMDE_VECTOR(64) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); - } - #endif - - return simde__m512i_from_private(r_); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ADDS_H) */ -/* :: End simde/x86/avx512/adds.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_4dpwssds_epi32 (simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b); - #else - simde__m128i_private bv = simde__m128i_to_private(simde_mm_loadu_epi32(b)); - simde__m512i r; - - r = simde_mm512_dpwssds_epi32(src, a0, simde_mm512_set1_epi32(bv.i32[0])); - r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a1, simde_mm512_set1_epi32(bv.i32[1])), r); - r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a2, simde_mm512_set1_epi32(bv.i32[2])), r); - r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a3, simde_mm512_set1_epi32(bv.i32[3])), r); - - return r; - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_4dpwssds_epi32 - #define _mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b) simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_4dpwssds_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b)); - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_mask_4dpwssds_epi32 - #define _mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b) simde_mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_4dpwssds_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { - #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - return _mm512_mask_4dpwssds_epi32(k, src, a0, a1, a2, a3, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b)); - #endif -} -#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) - #undef simde_mm512_maskz_4dpwssds_epi32 - #define _mm512_maskz_4dpwssds_epi32(k, src, a0, a1, a2, a3, b) simde_mm512_maskz_4dpwssds_epi32(k, src, a0, a1, a2, a3, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_4DPWSSDS_H) */ -/* :: End simde/x86/avx512/4dpwssds.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/abs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_ABS_H) -#define SIMDE_X86_AVX512_ABS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_abs_epi8(src, k, a); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi8 - #define _mm_mask_abs_epi8(src, k, a) simde_mm_mask_abs_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi8(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_abs_epi8(k, a); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi8 - #define _mm_maskz_abs_epi8(k, a) simde_mm_maskz_abs_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_abs_epi16(src, k, a); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi16 - #define _mm_mask_abs_epi16(src, k, a) simde_mm_mask_abs_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi16(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_abs_epi16(k, a); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi16 - #define _mm_maskz_abs_epi16(k, a) simde_mm_maskz_abs_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_abs_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi32 - #define _mm_mask_abs_epi32(src, k, a) simde_mm_mask_abs_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi32(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_abs_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi32 - #define _mm_maskz_abs_epi32(k, a) simde_mm_maskz_abs_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi64(simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_abs_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31); - return _mm_sub_epi64(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vabsq_s64(a_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64x2_t m = vshrq_n_s64(a_.neon_i64, 63); - r_.neon_i64 = vsubq_s64(veorq_s64(a_.neon_i64, m), m); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_abs(a_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_abs(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i64) z = { 0, }; - __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); - r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_abs_epi64 - #define _mm_abs_epi64(a) simde_mm_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_abs_epi64(src, k, a); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi64 - #define _mm_mask_abs_epi64(src, k, a) simde_mm_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi64(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_abs_epi64(k, a); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi64 - #define _mm_maskz_abs_epi64(k, a) simde_mm_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi64(simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_abs_epi64(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_abs_epi64(a_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi64 - #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_abs_epi64(src, k, a); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_abs_epi64 - #define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_abs_epi64(k, a); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_abs_epi64 - #define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi8(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi8(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi8 - #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi8 - #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi8 - #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi16(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi16(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi16 - #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi16 - #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi16 - #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi32(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi32(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi32 - #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi32 - #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi32 - #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi64(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi64(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi64 - #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi64 - #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi64 - #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_abs_ps(simde__m512 v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_abs_ps(v2); - #else - simde__m512_private - r_, - v2_ = simde__m512_to_private(v2); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vabsq_f32(v2_.m128_private[i].neon_f32); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = vec_abs(v2_.m128_private[i].altivec_f32); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_ps - #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_mask_abs_ps(src, k, v2); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_ps - #define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_abs_pd(simde__m512d v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_87467) - return _mm512_abs_pd(v2); - #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ - return _mm512_abs_pd(_mm512_castpd_ps(v2)); - #else - simde__m512d_private - r_, - v2_ = simde__m512d_to_private(v2); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vabsq_f64(v2_.m128d_private[i].neon_f64); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = vec_abs(v2_.m128d_private[i].altivec_f64); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_pd - #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_87467) - return _mm512_mask_abs_pd(src, k, v2); - #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ - return _mm512_mask_abs_pd(src, k, _mm512_castpd_ps(v2)); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_pd - #define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ABS_H) */ -/* :: End simde/x86/avx512/abs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/and.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_AND_H) -#define SIMDE_X86_AVX512_AND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_and_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_pd - #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_and_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_ps - #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_ps - #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_ps - #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_pd - #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_pd - #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi32 - #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi32 - #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi32 - #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi64 - #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi64 - #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi64 - #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_si512 - #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ -/* :: End simde/x86/avx512/and.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/andnot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_ANDNOT_H) -#define SIMDE_X86_AVX512_ANDNOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) -#else - #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_ps - #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_ps - #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_ps - #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) -#else - #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_pd - #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_pd - #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_pd - #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_andnot_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) -#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_si512 - #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi32 - #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi64 - #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi32 - #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi32 - #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi64 - #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi64 - #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ -/* :: End simde/x86/avx512/andnot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/avg.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_AVG_H) -#define SIMDE_X86_AVX512_AVG_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_avg_epu8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_avg_epu8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_avg_epu8 - #define _mm_mask_avg_epu8(src, k, a, b) simde_mm_mask_avg_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_avg_epu8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_avg_epu8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_avg_epu8 - #define _mm_maskz_avg_epu8(k, a, b) simde_mm_maskz_avg_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_avg_epu16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_avg_epu16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_avg_epu16 - #define _mm_mask_avg_epu16(src, k, a, b) simde_mm_mask_avg_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_avg_epu16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_avg_epu16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_avg_epu16 - #define _mm_maskz_avg_epu16(k, a, b) simde_mm_maskz_avg_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_avg_epu8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_avg_epu8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_avg_epu8 - #define _mm256_mask_avg_epu8(src, k, a, b) simde_mm256_mask_avg_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_avg_epu8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_avg_epu8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_avg_epu8 - #define _mm256_maskz_avg_epu8(k, a, b) simde_mm256_maskz_avg_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_avg_epu16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_avg_epu16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_avg_epu16 - #define _mm256_mask_avg_epu16(src, k, a, b) simde_mm256_mask_avg_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_avg_epu16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_avg_epu16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_avg_epu16 - #define _mm256_maskz_avg_epu16(k, a, b) simde_mm256_maskz_avg_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_avg_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_avg_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_avg_epu8 - #define _mm512_avg_epu8(a, b) simde_mm512_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_avg_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_avg_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_avg_epu8 - #define _mm512_mask_avg_epu8(src, k, a, b) simde_mm512_mask_avg_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_avg_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_avg_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_avg_epu8 - #define _mm512_maskz_avg_epu8(k, a, b) simde_mm512_maskz_avg_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_avg_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_avg_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_avg_epu16 - #define _mm512_avg_epu16(a, b) simde_mm512_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_avg_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_avg_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_avg_epu16 - #define _mm512_mask_avg_epu16(src, k, a, b) simde_mm512_mask_avg_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_avg_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_avg_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_avg_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_avg_epu16 - #define _mm512_maskz_avg_epu16(k, a, b) simde_mm512_maskz_avg_epu16(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_AVG_H) */ -/* :: End simde/x86/avx512/avg.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/bitshuffle.h :: */ -#if !defined(SIMDE_X86_AVX512_BITSHUFFLE_H) -#define SIMDE_X86_AVX512_BITSHUFFLE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_bitshuffle_epi64_mask (simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_bitshuffle_epi64_mask(b, c); - #else - simde__m128i_private - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - simde__mmask16 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(b_.u64) rv = { 0, 0 }; - __typeof__(b_.u64) lshift = { 0, 8 }; - - for (int8_t i = 0 ; i < 8 ; i++) { - __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; - rv |= ((b_.u64 >> ct) & 1) << lshift; - lshift += 1; - } - - r = - HEDLEY_STATIC_CAST(simde__mmask16, rv[0]) | - HEDLEY_STATIC_CAST(simde__mmask16, rv[1]); - #else - for (size_t i = 0 ; i < (sizeof(c_.m64_private) / sizeof(c_.m64_private[0])) ; i++) { - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t j = 0 ; j < (sizeof(c_.m64_private[i].u8) / sizeof(c_.m64_private[i].u8[0])) ; j++) { - r |= (((b_.u64[i] >> (c_.m64_private[i].u8[j]) & 63) & 1) << ((i * 8) + j)); - } - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_bitshuffle_epi64_mask - #define _mm_bitshuffle_epi64_mask(b, c) simde_mm_bitshuffle_epi64_mask(b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_bitshuffle_epi64_mask (simde__mmask16 k, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_bitshuffle_epi64_mask(k, b, c); - #else - return (k & simde_mm_bitshuffle_epi64_mask(b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_bitshuffle_epi64_mask - #define _mm_mask_bitshuffle_epi64_mask(k, b, c) simde_mm_mask_bitshuffle_epi64_mask(k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_bitshuffle_epi64_mask (simde__m256i b, simde__m256i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_bitshuffle_epi64_mask(b, c); - #else - simde__m256i_private - b_ = simde__m256i_to_private(b), - c_ = simde__m256i_to_private(c); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < sizeof(b_.m128i) / sizeof(b_.m128i[0]) ; i++) { - r |= (HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_bitshuffle_epi64_mask(b_.m128i[i], c_.m128i[i])) << (i * 16)); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(b_.u64) rv = { 0, 0, 0, 0 }; - __typeof__(b_.u64) lshift = { 0, 8, 16, 24 }; - - for (int8_t i = 0 ; i < 8 ; i++) { - __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; - rv |= ((b_.u64 >> ct) & 1) << lshift; - lshift += 1; - } - - r = - HEDLEY_STATIC_CAST(simde__mmask32, rv[0]) | - HEDLEY_STATIC_CAST(simde__mmask32, rv[1]) | - HEDLEY_STATIC_CAST(simde__mmask32, rv[2]) | - HEDLEY_STATIC_CAST(simde__mmask32, rv[3]); - #else - for (size_t i = 0 ; i < (sizeof(c_.m128i_private) / sizeof(c_.m128i_private[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(c_.m128i_private[i].m64_private) / sizeof(c_.m128i_private[i].m64_private[0])) ; j++) { - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t k = 0 ; k < (sizeof(c_.m128i_private[i].m64_private[j].u8) / sizeof(c_.m128i_private[i].m64_private[j].u8[0])) ; k++) { - r |= (((b_.m128i_private[i].u64[j] >> (c_.m128i_private[i].m64_private[j].u8[k]) & 63) & 1) << ((i * 16) + (j * 8) + k)); - } - } - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_bitshuffle_epi64_mask - #define _mm256_bitshuffle_epi64_mask(b, c) simde_mm256_bitshuffle_epi64_mask(b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_bitshuffle_epi64_mask (simde__mmask32 k, simde__m256i b, simde__m256i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_bitshuffle_epi64_mask(k, b, c); - #else - return (k & simde_mm256_bitshuffle_epi64_mask(b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_bitshuffle_epi64_mask - #define _mm256_mask_bitshuffle_epi64_mask(k, b, c) simde_mm256_mask_bitshuffle_epi64_mask(k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_bitshuffle_epi64_mask (simde__m512i b, simde__m512i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_bitshuffle_epi64_mask(b, c); - #else - simde__m512i_private - b_ = simde__m512i_to_private(b), - c_ = simde__m512i_to_private(c); - simde__mmask64 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(b_.m128i) / sizeof(b_.m128i[0])) ; i++) { - r |= (HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_bitshuffle_epi64_mask(b_.m128i[i], c_.m128i[i])) << (i * 16)); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(b_.m256i) / sizeof(b_.m256i[0])) ; i++) { - r |= (HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_bitshuffle_epi64_mask(b_.m256i[i], c_.m256i[i])) << (i * 32)); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(b_.u64) rv = { 0, 0, 0, 0, 0, 0, 0, 0 }; - __typeof__(b_.u64) lshift = { 0, 8, 16, 24, 32, 40, 48, 56 }; - - for (int8_t i = 0 ; i < 8 ; i++) { - __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; - rv |= ((b_.u64 >> ct) & 1) << lshift; - lshift += 1; - } - - r = - HEDLEY_STATIC_CAST(simde__mmask64, rv[0]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[1]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[2]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[3]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[4]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[5]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[6]) | - HEDLEY_STATIC_CAST(simde__mmask64, rv[7]); - #else - for (size_t i = 0 ; i < (sizeof(c_.m128i_private) / sizeof(c_.m128i_private[0])) ; i++) { - for (size_t j = 0 ; j < (sizeof(c_.m128i_private[i].m64_private) / sizeof(c_.m128i_private[i].m64_private[0])) ; j++) { - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t k = 0 ; k < (sizeof(c_.m128i_private[i].m64_private[j].u8) / sizeof(c_.m128i_private[i].m64_private[j].u8[0])) ; k++) { - r |= (((b_.m128i_private[i].u64[j] >> (c_.m128i_private[i].m64_private[j].u8[k]) & 63) & 1) << ((i * 16) + (j * 8) + k)); - } - } - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_bitshuffle_epi64_mask - #define _mm512_bitshuffle_epi64_mask(b, c) simde_mm512_bitshuffle_epi64_mask(b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_bitshuffle_epi64_mask (simde__mmask64 k, simde__m512i b, simde__m512i c) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_mask_bitshuffle_epi64_mask(k, b, c); - #else - return (k & simde_mm512_bitshuffle_epi64_mask(b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_bitshuffle_epi64_mask - #define _mm512_mask_bitshuffle_epi64_mask(k, b, c) simde_mm512_mask_bitshuffle_epi64_mask(k, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_BITSHUFFLE_H) */ -/* :: End simde/x86/avx512/bitshuffle.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/blend.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_BLEND_H) -#define SIMDE_X86_AVX512_BLEND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_blend_epi8(k, a, b); - #else - return simde_mm_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi8 - #define _mm_mask_blend_epi8(k, a, b) simde_mm_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_blend_epi16(k, a, b); - #else - return simde_mm_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi16 - #define _mm_mask_blend_epi16(k, a, b) simde_mm_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_epi32(k, a, b); - #else - return simde_mm_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi32 - #define _mm_mask_blend_epi32(k, a, b) simde_mm_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_epi64(k, a, b); - #else - return simde_mm_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi64 - #define _mm_mask_blend_epi64(k, a, b) simde_mm_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_blend_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_ps(k, a, b); - #else - return simde_mm_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_ps - #define _mm_mask_blend_ps(k, a, b) simde_mm_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_blend_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_pd(k, a, b); - #else - return simde_mm_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_pd - #define _mm_mask_blend_pd(k, a, b) simde_mm_mask_blend_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_blend_epi8(k, a, b); - #else - return simde_mm256_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi8 - #define _mm256_mask_blend_epi8(k, a, b) simde_mm256_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_blend_epi16(k, a, b); - #else - return simde_mm256_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi16 - #define _mm256_mask_blend_epi16(k, a, b) simde_mm256_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_epi32(k, a, b); - #else - return simde_mm256_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi32 - #define _mm256_mask_blend_epi32(k, a, b) simde_mm256_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_epi64(k, a, b); - #else - return simde_mm256_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi64 - #define _mm256_mask_blend_epi64(k, a, b) simde_mm256_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_blend_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_ps(k, a, b); - #else - return simde_mm256_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_ps - #define _mm256_mask_blend_ps(k, a, b) simde_mm256_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_blend_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_pd(k, a, b); - #else - return simde_mm256_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_pd - #define _mm256_mask_blend_pd(k, a, b) simde_mm256_mask_blend_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_blend_epi8(k, a, b); - #else - return simde_mm512_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi8 - #define _mm512_mask_blend_epi8(k, a, b) simde_mm512_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_blend_epi16(k, a, b); - #else - return simde_mm512_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi16 - #define _mm512_mask_blend_epi16(k, a, b) simde_mm512_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi32(k, a, b); - #else - return simde_mm512_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi32 - #define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi64(k, a, b); - #else - return simde_mm512_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi64 - #define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_ps(k, a, b); - #else - return simde_mm512_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_ps - #define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_pd(k, a, b); - #else - return simde_mm512_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_pd - #define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_BLEND_H) */ -/* :: End simde/x86/avx512/blend.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/broadcast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_BROADCAST_H) -#define SIMDE_X86_AVX512_BROADCAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_f32x2 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_broadcast_f32x2(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f32x2 - #define _mm256_broadcast_f32x2(a) simde_mm256_broadcast_f32x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_broadcast_f32x2(simde__m256 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_mask_broadcast_f32x2(src, k, a); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f32x2 - #define _mm256_mask_broadcast_f32x2(src, k, a) simde_mm256_mask_broadcast_f32x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_broadcast_f32x2(simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_maskz_broadcast_f32x2(k, a); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f32x2 - #define _mm256_maskz_broadcast_f32x2(k, a) simde_mm256_maskz_broadcast_f32x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x2 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f32x2(a); - #else - simde__m512_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x2 - #define _mm512_broadcast_f32x2(a) simde_mm512_broadcast_f32x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x2(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f32x2(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x2 - #define _mm512_mask_broadcast_f32x2(src, k, a) simde_mm512_mask_broadcast_f32x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x2(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f32x2(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x2 - #define _mm512_maskz_broadcast_f32x2(k, a) simde_mm512_maskz_broadcast_f32x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x8 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f32x8(a); - #else - simde__m512_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=8) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - r_.f32[i + 2] = a_.f32[2]; - r_.f32[i + 3] = a_.f32[3]; - r_.f32[i + 4] = a_.f32[4]; - r_.f32[i + 5] = a_.f32[5]; - r_.f32[i + 6] = a_.f32[6]; - r_.f32[i + 7] = a_.f32[7]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x8 - #define _mm512_broadcast_f32x8(a) simde_mm512_broadcast_f32x8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x8(simde__m512 src, simde__mmask16 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f32x8(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x8 - #define _mm512_mask_broadcast_f32x8(src, k, a) simde_mm512_mask_broadcast_f32x8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x8(simde__mmask16 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f32x8(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x8 - #define _mm512_maskz_broadcast_f32x8(k, a) simde_mm512_maskz_broadcast_f32x8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcast_f64x2 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f64x2(a); - #else - simde__m512d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[0]; - r_.f64[i + 1] = a_.f64[1]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f64x2 - #define _mm512_broadcast_f64x2(a) simde_mm512_broadcast_f64x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcast_f64x2(simde__m512d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f64x2(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f64x2 - #define _mm512_mask_broadcast_f64x2(src, k, a) simde_mm512_mask_broadcast_f64x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f64x2(k, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f64x2 - #define _mm512_maskz_broadcast_f64x2(k, a) simde_mm512_maskz_broadcast_f64x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_f32x4 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_broadcast_f32x4(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = a_; - r_.m128_private[1] = a_; - #elif defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 0, 1, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 4) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - r_.f32[i + 2] = a_.f32[2]; - r_.f32[i + 3] = a_.f32[3]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f32x4 - #define _mm256_broadcast_f32x4(a) simde_mm256_broadcast_f32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_broadcast_f32x4(simde__m256 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_broadcast_f32x4(src, k, a); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f32x4 - #define _mm256_mask_broadcast_f32x4(src, k, a) simde_mm256_mask_broadcast_f32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_broadcast_f32x4(simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_broadcast_f32x4(k, a); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f32x4 - #define _mm256_maskz_broadcast_f32x4(k, a) simde_mm256_maskz_broadcast_f32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_f64x2 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_broadcast_f64x2(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - /* I don't have a bug # for this, but when compiled with clang-10 without optimization on aarch64 - * the __builtin_shufflevector version doesn't work correctly. clang 9 and 11 aren't a problem */ - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION < 100000 || SIMDE_DETECT_CLANG_VERSION > 100000)) - r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[0]; - r_.f64[i + 1] = a_.f64[1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f64x2 - #define _mm256_broadcast_f64x2(a) simde_mm256_broadcast_f64x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_broadcast_f64x2(simde__m256d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_mask_broadcast_f64x2(src, k, a); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f64x2 - #define _mm256_mask_broadcast_f64x2(src, k, a) simde_mm256_mask_broadcast_f64x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_maskz_broadcast_f64x2(k, a); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f64x2 - #define _mm256_maskz_broadcast_f64x2(k, a) simde_mm256_maskz_broadcast_f64x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x4 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f32x4(a); - #else - simde__m512_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = a; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x4 - #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f32x4(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x4 - #define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f32x4(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x4 - #define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcast_f64x4 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f64x4(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f64x4 - #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f64x4(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f64x4 - #define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f64x4(k, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f64x4 - #define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i32x4 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i32x4(a); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = a; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i32x4 - #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i32x4(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_i32x4 - #define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i32x4(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_i32x4 - #define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i64x4 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i64x4(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i64x4 - #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i64x4(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_i64x4 - #define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i64x4(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_i64x4 - #define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastd_epi32(a); - #else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastd_epi32 - #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastd_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastd_epi32 - #define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastd_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastd_epi32 - #define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastq_epi64(a); - #else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastq_epi64 - #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastq_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastq_epi64 - #define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastq_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastq_epi64 - #define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastss_ps(a); - #else - simde__m512_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastss_ps - #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastss_ps(src, k, a); - #else - simde__m512_private - src_ = simde__m512_to_private(src), - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastss_ps - #define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastss_ps(k, a); - #else - simde__m512_private - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastss_ps - #define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastsd_pd(a); - #else - simde__m512d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastsd_pd - #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastsd_pd(src, k, a); - #else - simde__m512d_private - src_ = simde__m512d_to_private(src), - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastsd_pd - #define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastsd_pd(k, a); - #else - simde__m512d_private - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastsd_pd - #define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastb_epi8(a); - #else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi8(a_.i8[0]); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastb_epi8 - #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_broadcastb_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastb_epi8 - #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_broadcastb_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastb_epi8 - #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastw_epi16(a); - #else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi16(a_.i16[0]); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastw_epi16 - #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_BROADCAST_H) */ -/* :: End simde/x86/avx512/broadcast.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_CMP_H) -#define SIMDE_X86_AVX512_CMP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov_mask.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) -#define SIMDE_X86_AVX512_MOV_MASK_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_movepi8_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi8_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi8_mask - #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi16_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi16_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* There is no 32-bit _mm_movemask_* function, so we use - * _mm_movemask_epi8 then extract the odd bits. */ - uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); - r = ( (r >> 1)) & UINT32_C(0x5555); - r = (r | (r >> 1)) & UINT32_C(0x3333); - r = (r | (r >> 2)) & UINT32_C(0x0f0f); - r = (r | (r >> 4)) & UINT32_C(0x00ff); - return HEDLEY_STATIC_CAST(simde__mmask8, r); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi16_mask - #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi32_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi32_mask(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi32_mask - #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi64_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi64_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi64_mask - #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_movepi8_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi8_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask32, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi8_mask - #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_movepi16_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi16_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi16_mask - #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi32_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi32_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi32_mask - #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi64_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi64_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi64_mask - #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_movepi8_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi8_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask64 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); - } - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask64, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi8_mask - #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_movepi16_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi16_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi16_mask - #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_movepi32_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi32_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi32_mask - #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_movepi64_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi64_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi64_mask - #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ -/* :: End simde/x86/avx512/mov_mask.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setzero.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SETZERO_H) -#define SIMDE_X86_AVX512_SETZERO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setzero_si512(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_si512(); - #else - simde__m512i r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() - #undef _mm512_setzero_epi32 - #define _mm512_setzero_epi32() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setzero_ps(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_ps(); - #else - return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ps - #define _mm512_setzero_ps() simde_mm512_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setzero_pd(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_pd(); - #else - return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_pd - #define _mm512_setzero_pd() simde_mm512_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_setzero_ph(void) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_setzero_ph(); - #else - return simde_mm512_castsi512_ph(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ph - #define _mm512_setzero_ph() simde_mm512_setzero_ph() -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ -/* :: End simde/x86/avx512/setzero.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setone.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SETONE_H) -#define SIMDE_X86_AVX512_SETONE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_setone_si512(void) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } - - return simde__m512i_from_private(r_); -} -#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_setone_ps(void) { - return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_setone_pd(void) { - return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_setone_ph(void) { - return simde_mm512_castsi512_ph(simde_x_mm512_setone_si512()); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ -/* :: End simde/x86/avx512/setone.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmp_epi8_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 != b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi8_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi8_mask(a, b, imm8) _mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi8_mask - #define _mm512_cmp_epi8_mask(a, b, imm8) simde_mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epi32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epi32_mask(a, b, imm8) _mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epi32_mask - #define _mm256_cmp_epi32_mask(a, b, imm8) simde_mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ps_mask - #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps_mask - #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps_mask - #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_pd_mask - #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd_mask - #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd_mask - #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_ph_mask (simde__m512h a, simde__m512h b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512h_private - r_, - a_ = simde__m512h_to_private(a), - b_ = simde__m512h_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i]) - && !simde_isnanhf(a_.f16[i]) && !simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_to_float32(a_.f16[i]) != simde_float16_to_float32(a_.f16[i])) - || (simde_float16_to_float32(b_.f16[i]) != simde_float16_to_float32(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - || simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == a_.f16) & (b_.f16 == b_.f16) & (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - !(simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - && (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ((a_.f16 == a_.f16) & (b_.f16 == b_.f16))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) ? INT16_C(0) : ~INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16) | (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - || (simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512h_to_private(simde_mm512_setzero_ph()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512h_to_private(simde_x_mm512_setone_ph()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde_mm512_castph_si512(simde__m512h_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - #define simde_mm512_cmp_ph_mask(a, b, imm8) _mm512_cmp_ph_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ph_mask - #define _mm512_cmp_ph_mask(a, b, imm8) simde_mm512_cmp_ph_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epi16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 == b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 != b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi16_mask(a, b, imm8) _mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi16_mask - #define _mm512_cmp_epi16_mask(a, b, imm8) simde_mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epi16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epi16_mask -#define _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epi16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epi32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi32_mask(a, b, imm8) _mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi32_mask - #define _mm512_cmp_epi32_mask(a, b, imm8) simde_mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epi64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 == b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 != b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi64_mask(a, b, imm8) _mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi64_mask - #define _mm512_cmp_epi64_mask(a, b, imm8) simde_mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu16_mask - #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu16_mask -#define _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epu32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epu32_mask(a, b, imm8) _mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epu32_mask - #define _mm256_cmp_epu32_mask(a, b, imm8) simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) _mm256_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmp_epu32_mask -#define _mm256_mask_cmp_epu32_mask(a, b, imm8) simde_mm256_mask_cmp_epu32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epu32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu32_mask(a, b, imm8) _mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu32_mask - #define _mm512_cmp_epu32_mask(a, b, imm8) simde_mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu32_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu32_mask -#define _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu32_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epu64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 == b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 != b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu64_mask(a, b, imm8) _mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu64_mask - #define _mm512_cmp_epu64_mask(a, b, imm8) simde_mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu64_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu64_mask -#define _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu64_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ -/* :: End simde/x86/avx512/cmp.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpeq.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_CMPEQ_H) -#define SIMDE_X86_AVX512_CMPEQ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpeq_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[i], b_.m256i[i]))); - r |= HEDLEY_STATIC_CAST(uint64_t, t) << (i * 32); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi8_mask - #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpeq_epi8_mask(simde__mmask64 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpeq_epi8_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi8_mask - #define _mm512_mask_cmpeq_epi8_mask(k1, a, b) simde_mm512_mask_cmpeq_epi8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi32_mask - #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi32_mask - #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi64_mask - #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi64_mask - #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpeq_epu16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpeq_epu16_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask32 r; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.u16), a_.u16 == b_.u16); - r = simde_mm512_movepi16_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r |= (a_.u16[i] == b_.u16[i]) ? (UINT16_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epu16_mask - #define _mm512_cmpeq_epu16_mask(a, b) simde_mm512_cmpeq_epu16_mask((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpeq_epu16_mask(simde__mmask32 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpeq_epu16_mask(k1, a, b); - #else - return k1 & simde_mm512_cmpeq_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epu16_mask - #define _mm512_mask_cmpeq_epu16_mask(k1, a, b) simde_mm512_mask_cmpeq_epu16_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_ps_mask - #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_pd_mask - #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPEQ_H) */ -/* :: End simde/x86/avx512/cmpeq.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpge.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Christopher Moore - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_CMPGE_H) -#define SIMDE_X86_AVX512_CMPGE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/movm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_MOVM_H) -#define SIMDE_X86_AVX512_MOVM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi8 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi8(k); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - const simde__m128i zero = simde_mm_setzero_si128(); - const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); - const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_shuffle_epi8(r, shuffle); - r = simde_mm_cmpgt_epi8(zero, r); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int8_t pos_data[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; - int8x8_t pos = vld1_s8(pos_data); - r_.neon_i8 = vcombine_s8( - vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k)), pos), 7), - vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k >> 8)), pos), 7)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi8 - #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi8 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi8(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i zero = simde_mm256_setzero_si256(); - const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); - const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); - simde__m256i r; - - r = simde_mm256_set_m128i(_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), _mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); - r = simde_mm256_mullo_epi16(r, bits); - r = simde_mm256_shuffle_epi8(r, shuffle); - r = simde_mm256_cmpgt_epi8(zero, r); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi8 - #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi8 (simde__mmask64 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movm_epi8(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); - r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi8 - #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi16 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi16(k); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi16(r, 15); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int16_t pos_data[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; - const int16x8_t pos = vld1q_s16(pos_data); - r_.neon_i16 = vshrq_n_s16(vshlq_s16(vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, k)), pos), 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi16 - #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi16 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi16(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i bits = _mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - __m256i r; - - r = _mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = _mm256_mullo_epi16(r, bits); - r = _mm256_srai_epi16(r, 15); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); - r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi16 - #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi16 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm512_movm_epi16(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi16 - #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i shifts = _mm_set_epi32(28, 29, 30, 31); - __m128i r; - - r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm_sllv_epi32(r, shifts); - r = _mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int32_t pos_data[] = { 31, 30, 29, 28 }; - const int32x4_t pos = vld1q_s32(pos_data); - r_.neon_i32 = vshrq_n_s32(vshlq_s32(vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, k)), pos), 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi32 - #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i shifts = _mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); - __m256i r; - - r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm256_sllv_epi32(r, shifts); - r = _mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi32(k ); - r_.m128i[1] = simde_mm_movm_epi32(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi32 - #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi32 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi32(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); - r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi32 - #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi64(k); - /* N.B. CM: These fallbacks may not be faster as there are only two elements */ - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i shifts = _mm_set_epi32(30, 30, 31, 31); - __m128i r; - - r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm_sllv_epi32(r, shifts); - r = _mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int64_t pos_data[] = { 63, 62 }; - const int64x2_t pos = vld1q_s64(pos_data); - r_.neon_i64 = vshrq_n_s64(vshlq_s64(vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, k)), pos), 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi64 - #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi64(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i shifts = _mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); - __m256i r; - - r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm256_sllv_epi32(r, shifts); - r = _mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi64(k ); - r_.m128i[1] = simde_mm_movm_epi64(k >> 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi64 - #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi64(k); - #else - simde__m512i_private r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi64(k ); - r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi64 - #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOVM_H) */ -/* :: End simde/x86/avx512/movm.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmpge_epi8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpge(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpge_epi8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epi8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi8_mask - #define _mm_cmpge_epi8_mask(a, b) simde_mm_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpge_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi8_mask - #define _mm_mask_cmpge_epi8_mask(k, a, b) simde_mm_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmpge_epi8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpge_epi8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epi8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi8_mask - #define _mm256_cmpge_epi8_mask(a, b) simde_mm256_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpge_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi8_mask - #define _mm256_mask_cmpge_epi8_mask(k, a, b) simde_mm256_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmpge_epi8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epi8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi8_mask - #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpge_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi8_mask - #define _mm512_mask_cmpge_epi8_mask(k, a, b) simde_mm512_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmpge_epu8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a_.altivec_u8, b_.altivec_u8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpge_epu8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epu8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu8_mask - #define _mm_cmpge_epu8_mask(a, b) simde_mm_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpge_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu8_mask - #define _mm_mask_cmpge_epu8_mask(k, a, b) simde_mm_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmpge_epu8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpge_epu8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epu8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu8_mask - #define _mm256_cmpge_epu8_mask(a, b) simde_mm256_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpge_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu8_mask - #define _mm256_mask_cmpge_epu8_mask(k, a, b) simde_mm256_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmpge_epu8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epu8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu8_mask - #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpge_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu8_mask - #define _mm512_mask_cmpge_epu8_mask(k, a, b) simde_mm512_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmpge_epi16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpge(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epi16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi16_mask - #define _mm_cmpge_epi16_mask(a, b) simde_mm_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi16_mask - #define _mm_mask_cmpge_epi16_mask(k, a, b) simde_mm_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmpge_epi16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpge_epi16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epi16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi16_mask - #define _mm256_cmpge_epi16_mask(a, b) simde_mm256_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpge_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi16_mask - #define _mm256_mask_cmpge_epi16_mask(k, a, b) simde_mm256_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmpge_epi16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpge_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epi16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi16_mask - #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpge_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi16_mask - #define _mm512_mask_cmpge_epi16_mask(k, a, b) simde_mm512_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmpge_epu16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a_.altivec_u16, b_.altivec_u16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epu16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu16_mask - #define _mm_cmpge_epu16_mask(a, b) simde_mm_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu16_mask - #define _mm_mask_cmpge_epu16_mask(k, a, b) simde_mm_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmpge_epu16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpge_epu16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epu16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu16_mask - #define _mm256_cmpge_epu16_mask(a, b) simde_mm256_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpge_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu16_mask - #define _mm256_mask_cmpge_epu16_mask(k, a, b) simde_mm256_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmpge_epu16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpge_epu16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epu16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu16_mask - #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpge_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu16_mask - #define _mm512_mask_cmpge_epu16_mask(k, a, b) simde_mm512_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmpge_epi32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpge(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epi32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi32_mask - #define _mm_cmpge_epi32_mask(a, b) simde_mm_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi32_mask - #define _mm_mask_cmpge_epi32_mask(k, a, b) simde_mm_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmpge_epi32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epi32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epi32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi32_mask - #define _mm256_cmpge_epi32_mask(a, b) simde_mm256_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi32_mask - #define _mm256_mask_cmpge_epi32_mask(k, a, b) simde_mm256_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmpge_epi32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpge_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epi32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi32_mask - #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpge_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi32_mask - #define _mm512_mask_cmpge_epi32_mask(k, a, b) simde_mm512_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmpge_epu32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a_.altivec_u32, b_.altivec_u32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epu32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu32_mask - #define _mm_cmpge_epu32_mask(a, b) simde_mm_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu32_mask - #define _mm_mask_cmpge_epu32_mask(k, a, b) simde_mm_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmpge_epu32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epu32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epu32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu32_mask - #define _mm256_cmpge_epu32_mask(a, b) simde_mm256_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu32_mask - #define _mm256_mask_cmpge_epu32_mask(k, a, b) simde_mm256_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmpge_epu32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpge_epu32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epu32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu32_mask - #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpge_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu32_mask - #define _mm512_mask_cmpge_epu32_mask(k, a, b) simde_mm512_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmpge_epi64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpge(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epi64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi64_mask - #define _mm_cmpge_epi64_mask(a, b) simde_mm_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi64_mask - #define _mm_mask_cmpge_epi64_mask(k, a, b) simde_mm_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmpge_epi64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epi64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epi64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi64_mask - #define _mm256_cmpge_epi64_mask(a, b) simde_mm256_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi64_mask - #define _mm256_mask_cmpge_epi64_mask(k, a, b) simde_mm256_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmpge_epi64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpge_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epi64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi64_mask - #define _mm512_cmpge_epi64_mask(a, b) simde_mm512_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi64_mask - #define _mm512_mask_cmpge_epi64_mask(k, a, b) simde_mm512_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmpge_epu64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a_.altivec_u64, b_.altivec_u64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epu64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu64_mask - #define _mm_cmpge_epu64_mask(a, b) simde_mm_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu64_mask - #define _mm_mask_cmpge_epu64_mask(k, a, b) simde_mm_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmpge_epu64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epu64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epu64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu64_mask - #define _mm256_cmpge_epu64_mask(a, b) simde_mm256_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu64_mask - #define _mm256_mask_cmpge_epu64_mask(k, a, b) simde_mm256_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmpge_epu64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpge_epu64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epu64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu64_mask - #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu64_mask - #define _mm512_mask_cmpge_epu64_mask(k, a, b) simde_mm512_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPGE_H) */ -/* :: End simde/x86/avx512/cmpge.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpgt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CMPGT_H) -#define SIMDE_X86_AVX512_CMPGT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[i], b_.m256i[i]))); - r |= HEDLEY_STATIC_CAST(uint64_t, t) << HEDLEY_STATIC_CAST(uint64_t, i * 32); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi8_mask - #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epu8_mask - #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpgt_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epi16_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi16(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi16_mask - #define _mm512_cmpgt_epi16_mask(a, b) simde_mm512_cmpgt_epi16_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi32_mask - #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi32_mask - #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi64_mask - #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi64_mask - #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPGT_H) */ -/* :: End simde/x86/avx512/cmpgt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmple.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_CMPLE_H) -#define SIMDE_X86_AVX512_CMPLE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmple_epi8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmple(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmple_epi8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmple_epi8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmple_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epi8_mask - #define _mm_cmple_epi8_mask(a, b) simde_mm_cmple_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmple_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmple_epi8_mask(k, a, b); - #else - return k & simde_mm_cmple_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epi8_mask - #define _mm_mask_cmple_epi8_mask(k, a, b) simde_mm_mask_cmple_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmple_epi8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmple_epi8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmple_epi8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmple_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epi8_mask - #define _mm256_cmple_epi8_mask(a, b) simde_mm256_cmple_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmple_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmple_epi8_mask(k, a, b); - #else - return k & simde_mm256_cmple_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epi8_mask - #define _mm256_mask_cmple_epi8_mask(k, a, b) simde_mm256_mask_cmple_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmple_epi8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epi8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmple_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epi8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmple_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi8_mask - #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmple_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmple_epi8_mask(k, a, b); - #else - return k & simde_mm512_cmple_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epi8_mask - #define _mm512_mask_cmple_epi8_mask(k, a, b) simde_mm512_mask_cmple_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmple_epu8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a_.altivec_u8, b_.altivec_u8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmple_epu8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmple_epu8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmple_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epu8_mask - #define _mm_cmple_epu8_mask(a, b) simde_mm_cmple_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmple_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmple_epu8_mask(k, a, b); - #else - return k & simde_mm_cmple_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epu8_mask - #define _mm_mask_cmple_epu8_mask(k, a, b) simde_mm_mask_cmple_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmple_epu8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmple_epu8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmple_epu8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmple_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epu8_mask - #define _mm256_cmple_epu8_mask(a, b) simde_mm256_cmple_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmple_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmple_epu8_mask(k, a, b); - #else - return k & simde_mm256_cmple_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epu8_mask - #define _mm256_mask_cmple_epu8_mask(k, a, b) simde_mm256_mask_cmple_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmple_epu8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epu8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmple_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epu8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmple_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu8_mask - #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmple_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmple_epu8_mask(k, a, b); - #else - return k & simde_mm512_cmple_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epu8_mask - #define _mm512_mask_cmple_epu8_mask(k, a, b) simde_mm512_mask_cmple_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmple_epi16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcleq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmple(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epi16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmple_epi16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmple_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epi16_mask - #define _mm_cmple_epi16_mask(a, b) simde_mm_cmple_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmple_epi16_mask(k, a, b); - #else - return k & simde_mm_cmple_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epi16_mask - #define _mm_mask_cmple_epi16_mask(k, a, b) simde_mm_mask_cmple_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmple_epi16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmple_epi16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmple_epi16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmple_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epi16_mask - #define _mm256_cmple_epi16_mask(a, b) simde_mm256_cmple_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmple_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmple_epi16_mask(k, a, b); - #else - return k & simde_mm256_cmple_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epi16_mask - #define _mm256_mask_cmple_epi16_mask(k, a, b) simde_mm256_mask_cmple_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmple_epi16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epi16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmple_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epi16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmple_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi16_mask - #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmple_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmple_epi16_mask(k, a, b); - #else - return k & simde_mm512_cmple_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epi16_mask - #define _mm512_mask_cmple_epi16_mask(k, a, b) simde_mm512_mask_cmple_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmple_epu16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a_.altivec_u16, b_.altivec_u16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epu16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmple_epu16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmple_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epu16_mask - #define _mm_cmple_epu16_mask(a, b) simde_mm_cmple_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmple_epu16_mask(k, a, b); - #else - return k & simde_mm_cmple_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epu16_mask - #define _mm_mask_cmple_epu16_mask(k, a, b) simde_mm_mask_cmple_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmple_epu16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmple_epu16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmple_epu16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmple_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epu16_mask - #define _mm256_cmple_epu16_mask(a, b) simde_mm256_cmple_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmple_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmple_epu16_mask(k, a, b); - #else - return k & simde_mm256_cmple_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epu16_mask - #define _mm256_mask_cmple_epu16_mask(k, a, b) simde_mm256_mask_cmple_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmple_epu16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epu16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmple_epu16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epu16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmple_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu16_mask - #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmple_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmple_epu16_mask(k, a, b); - #else - return k & simde_mm512_cmple_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epu16_mask - #define _mm512_mask_cmple_epu16_mask(k, a, b) simde_mm512_mask_cmple_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmple_epi32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmple(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epi32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmple_epi32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmple_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epi32_mask - #define _mm_cmple_epi32_mask(a, b) simde_mm_cmple_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmple_epi32_mask(k, a, b); - #else - return k & simde_mm_cmple_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epi32_mask - #define _mm_mask_cmple_epi32_mask(k, a, b) simde_mm_mask_cmple_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmple_epi32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmple_epi32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmple_epi32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmple_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epi32_mask - #define _mm256_cmple_epi32_mask(a, b) simde_mm256_cmple_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmple_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmple_epi32_mask(k, a, b); - #else - return k & simde_mm256_cmple_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epi32_mask - #define _mm256_mask_cmple_epi32_mask(k, a, b) simde_mm256_mask_cmple_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmple_epi32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmple_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmple_epi32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmple_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi32_mask - #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmple_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmple_epi32_mask(k, a, b); - #else - return k & simde_mm512_cmple_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epi32_mask - #define _mm512_mask_cmple_epi32_mask(k, a, b) simde_mm512_mask_cmple_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmple_epu32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a_.altivec_u32, b_.altivec_u32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epu32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmple_epu32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmple_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epu32_mask - #define _mm_cmple_epu32_mask(a, b) simde_mm_cmple_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmple_epu32_mask(k, a, b); - #else - return k & simde_mm_cmple_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epu32_mask - #define _mm_mask_cmple_epu32_mask(k, a, b) simde_mm_mask_cmple_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmple_epu32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmple_epu32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmple_epu32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmple_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epu32_mask - #define _mm256_cmple_epu32_mask(a, b) simde_mm256_cmple_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmple_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmple_epu32_mask(k, a, b); - #else - return k & simde_mm256_cmple_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epu32_mask - #define _mm256_mask_cmple_epu32_mask(k, a, b) simde_mm256_mask_cmple_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmple_epu32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epu32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmple_epu32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmple_epu32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmple_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu32_mask - #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmple_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmple_epu32_mask(k, a, b); - #else - return k & simde_mm512_cmple_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epu32_mask - #define _mm512_mask_cmple_epu32_mask(k, a, b) simde_mm512_mask_cmple_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmple_epi64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmple(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epi64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmple_epi64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmple_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epi64_mask - #define _mm_cmple_epi64_mask(a, b) simde_mm_cmple_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmple_epi64_mask(k, a, b); - #else - return k & simde_mm_cmple_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epi64_mask - #define _mm_mask_cmple_epi64_mask(k, a, b) simde_mm_mask_cmple_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmple_epi64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmple_epi64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmple_epi64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmple_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epi64_mask - #define _mm256_cmple_epi64_mask(a, b) simde_mm256_cmple_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmple_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmple_epi64_mask(k, a, b); - #else - return k & simde_mm256_cmple_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epi64_mask - #define _mm256_mask_cmple_epi64_mask(k, a, b) simde_mm256_mask_cmple_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmple_epi64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmple_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmple_epi64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmple_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi64_mask - #define _mm512_cmple_epi64_mask(a, b) simde_mm512_cmple_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmple_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmple_epi64_mask(k, a, b); - #else - return k & simde_mm512_cmple_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epi64_mask - #define _mm512_mask_cmple_epi64_mask(k, a, b) simde_mm512_mask_cmple_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmple_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmple_epu64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a_.altivec_u64, b_.altivec_u64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmple_epu64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmple_epu64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmple_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmple_epu64_mask - #define _mm_cmple_epu64_mask(a, b) simde_mm_cmple_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmple_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmple_epu64_mask(k, a, b); - #else - return k & simde_mm_cmple_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmple_epu64_mask - #define _mm_mask_cmple_epu64_mask(k, a, b) simde_mm_mask_cmple_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmple_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmple_epu64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmple_epu64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmple_epu64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmple_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmple_epu64_mask - #define _mm256_cmple_epu64_mask(a, b) simde_mm256_cmple_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmple_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmple_epu64_mask(k, a, b); - #else - return k & simde_mm256_cmple_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmple_epu64_mask - #define _mm256_mask_cmple_epu64_mask(k, a, b) simde_mm256_mask_cmple_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmple_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmple_epu64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmple_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmple_epu64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmple_epu64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmple_epu64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmple_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu64_mask - #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmple_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmple_epu64_mask(k, a, b); - #else - return k & simde_mm512_cmple_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmple_epu64_mask - #define _mm512_mask_cmple_epu64_mask(k, a, b) simde_mm512_mask_cmple_epu64_mask((k), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPLE_H) */ -/* :: End simde/x86/avx512/cmple.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmplt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_CMPLT_H) -#define SIMDE_X86_AVX512_CMPLT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_ps_mask - #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_pd_mask - #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epi8_mask - #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epu8_mask - #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPLT_H) */ -/* :: End simde/x86/avx512/cmplt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpneq.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_CMPNEQ_H) -#define SIMDE_X86_AVX512_CMPNEQ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpneq_epi8_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpneq_epi8_mask(a, b); - #else - return ~simde_mm_movepi8_mask(simde_mm_cmpeq_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epi8_mask - #define _mm_cmpneq_epi8_mask(a, b) simde_mm_cmpneq_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpneq_epi8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpneq_epi8_mask(k1, a, b); - #else - return simde_mm_cmpneq_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epi8_mask - #define _mm_mask_cmpneq_epi8_mask(k1, a, b) simde_mm_mask_cmpneq_epi8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpneq_epu8_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpneq_epu8_mask(a, b); - #else - return simde_mm_cmpneq_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epu8_mask - #define _mm_cmpneq_epu8_mask(a, b) simde_mm_cmpneq_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpneq_epu8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpneq_epu8_mask(k1, a, b); - #else - return simde_mm_mask_cmpneq_epi8_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epu8_mask - #define _mm_mask_cmpneq_epu8_mask(k1, a, b) simde_mm_mask_cmpneq_epu8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epi16_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpneq_epi16_mask(a, b); - #else - return ~simde_mm_movepi16_mask(simde_mm_cmpeq_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epi16_mask - #define _mm_cmpneq_epi16_mask(a, b) simde_mm_cmpneq_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epi16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpneq_epi16_mask(k1, a, b); - #else - return simde_mm_cmpneq_epi16_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epi16_mask - #define _mm_mask_cmpneq_epi16_mask(k1, a, b) simde_mm_mask_cmpneq_epi16_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epu16_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpneq_epu16_mask(a, b); - #else - return simde_mm_cmpneq_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epu16_mask - #define _mm_cmpneq_epu16_mask(a, b) simde_mm_cmpneq_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epu16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpneq_epu16_mask(k1, a, b); - #else - return simde_mm_mask_cmpneq_epi16_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epu16_mask - #define _mm_mask_cmpneq_epu16_mask(k1, a, b) simde_mm_mask_cmpneq_epu16_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epi32_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpneq_epi32_mask(a, b); - #else - return (~simde_mm_movepi32_mask(simde_mm_cmpeq_epi32(a, b))) & 15; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epi32_mask - #define _mm_cmpneq_epi32_mask(a, b) simde_mm_cmpneq_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpneq_epi32_mask(k1, a, b); - #else - return simde_mm_cmpneq_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epi32_mask - #define _mm_mask_cmpneq_epi32_mask(k1, a, b) simde_mm_mask_cmpneq_epi32_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epu32_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpneq_epu32_mask(a, b); - #else - return simde_mm_cmpneq_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epu32_mask - #define _mm_cmpneq_epu32_mask(a, b) simde_mm_cmpneq_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpneq_epu32_mask(k1, a, b); - #else - return simde_mm_mask_cmpneq_epi32_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epu32_mask - #define _mm_mask_cmpneq_epu32_mask(k1, a, b) simde_mm_mask_cmpneq_epu32_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epi64_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpneq_epi64_mask(a, b); - #else - return (~simde_mm_movepi64_mask(simde_mm_cmpeq_epi64(a, b))) & 3; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epi64_mask - #define _mm_cmpneq_epi64_mask(a, b) simde_mm_cmpneq_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpneq_epi64_mask(k1, a, b); - #else - return simde_mm_cmpneq_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epi64_mask - #define _mm_mask_cmpneq_epi64_mask(k1, a, b) simde_mm_mask_cmpneq_epi64_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpneq_epu64_mask(simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpneq_epu64_mask(a, b); - #else - return simde_mm_cmpneq_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpneq_epu64_mask - #define _mm_cmpneq_epu64_mask(a, b) simde_mm_cmpneq_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpneq_epu64_mask(k1, a, b); - #else - return simde_mm_mask_cmpneq_epi64_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpneq_epu64_mask - #define _mm_mask_cmpneq_epu64_mask(k1, a, b) simde_mm_mask_cmpneq_epu64_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpneq_epi8_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpneq_epi8_mask(a, b); - #else - return ~simde_mm256_movepi8_mask(simde_mm256_cmpeq_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epi8_mask - #define _mm256_cmpneq_epi8_mask(a, b) simde_mm256_cmpneq_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpneq_epi8_mask(simde__mmask32 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpneq_epi8_mask(k1, a, b); - #else - return simde_mm256_cmpneq_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epi8_mask - #define _mm256_mask_cmpneq_epi8_mask(k1, a, b) simde_mm256_mask_cmpneq_epi8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpneq_epu8_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpneq_epu8_mask(a, b); - #else - return simde_mm256_cmpneq_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epu8_mask - #define _mm256_cmpneq_epu8_mask(a, b) simde_mm256_cmpneq_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpneq_epu8_mask(simde__mmask32 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpneq_epu8_mask(k1, a, b); - #else - return simde_mm256_mask_cmpneq_epi8_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epu8_mask - #define _mm256_mask_cmpneq_epu8_mask(k1, a, b) simde_mm256_mask_cmpneq_epu8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpneq_epi16_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpneq_epi16_mask(a, b); - #else - return ~simde_mm256_movepi16_mask(simde_mm256_cmpeq_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epi16_mask - #define _mm256_cmpneq_epi16_mask(a, b) simde_mm256_cmpneq_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpneq_epi16_mask(simde__mmask16 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpneq_epi16_mask(k1, a, b); - #else - return simde_mm256_cmpneq_epi16_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epi16_mask - #define _mm256_mask_cmpneq_epi16_mask(k1, a, b) simde_mm256_mask_cmpneq_epi16_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpneq_epu16_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpneq_epu16_mask(a, b); - #else - return simde_mm256_cmpneq_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epu16_mask - #define _mm256_cmpneq_epu16_mask(a, b) simde_mm256_cmpneq_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpneq_epu16_mask(simde__mmask16 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpneq_epu16_mask(k1, a, b); - #else - return simde_mm256_mask_cmpneq_epi16_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epu16_mask - #define _mm256_mask_cmpneq_epu16_mask(k1, a, b) simde_mm256_mask_cmpneq_epu16_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpneq_epi32_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpneq_epi32_mask(a, b); - #else - return (~simde_mm256_movepi32_mask(simde_mm256_cmpeq_epi32(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epi32_mask - #define _mm256_cmpneq_epi32_mask(a, b) simde_mm256_cmpneq_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpneq_epi32_mask(k1, a, b); - #else - return simde_mm256_cmpneq_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epi32_mask - #define _mm256_mask_cmpneq_epi32_mask(k1, a, b) simde_mm256_mask_cmpneq_epi32_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpneq_epu32_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpneq_epu32_mask(a, b); - #else - return simde_mm256_cmpneq_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epu32_mask - #define _mm256_cmpneq_epu32_mask(a, b) simde_mm256_cmpneq_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpneq_epu32_mask(k1, a, b); - #else - return simde_mm256_mask_cmpneq_epi32_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epu32_mask - #define _mm256_mask_cmpneq_epu32_mask(k1, a, b) simde_mm256_mask_cmpneq_epu32_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpneq_epi64_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpneq_epi64_mask(a, b); - #else - return (~simde_mm256_movepi64_mask(simde_mm256_cmpeq_epi64(a, b))) & 15; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epi64_mask - #define _mm256_cmpneq_epi64_mask(a, b) simde_mm256_cmpneq_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpneq_epi64_mask(k1, a, b); - #else - return simde_mm256_cmpneq_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epi64_mask - #define _mm256_mask_cmpneq_epi64_mask(k1, a, b) simde_mm256_mask_cmpneq_epi64_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpneq_epu64_mask(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpneq_epu64_mask(a, b); - #else - return simde_mm256_cmpneq_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpneq_epu64_mask - #define _mm256_cmpneq_epu64_mask(a, b) simde_mm256_cmpneq_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpneq_epu64_mask(k1, a, b); - #else - return simde_mm256_mask_cmpneq_epi64_mask(k1, a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpneq_epu64_mask - #define _mm256_mask_cmpneq_epu64_mask(k1, a, b) simde_mm256_mask_cmpneq_epu64_mask((k1), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPNEQ_H) */ -/* :: End simde/x86/avx512/cmpneq.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/compress.h :: */ -#if !defined(SIMDE_X86_AVX512_COMPRESS_H) -#define SIMDE_X86_AVX512_COMPRESS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_compress_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_mask_compress_pd(src, k, a); - #else - simde__m256d_private - a_ = simde__m256d_to_private(a), - src_ = simde__m256d_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) { - a_.f64[ri] = src_.f64[ri]; - } - - return simde__m256d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compress_pd - #define _mm256_mask_compress_pd(src, k, a) simde_mm256_mask_compress_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm256_mask_compressstoreu_pd(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm256_mask_storeu_pd(base_addr, store_mask, _mm256_maskz_compress_pd(k, a)); - #else - simde__m256d_private - a_ = simde__m256d_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compressstoreu_pd - #define _mm256_mask_compressstoreu_pd(base_addr, k, a) simde_mm256_mask_compressstoreu_pd(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_compress_pd (simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_maskz_compress_pd(k, a); - #else - simde__m256d_private - a_ = simde__m256d_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) { - a_.f64[ri] = SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_compress_pd - #define _mm256_maskz_compress_pd(k, a) simde_mm256_maskz_compress_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_compress_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_mask_compress_ps(src, k, a); - #else - simde__m256_private - a_ = simde__m256_to_private(a), - src_ = simde__m256_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) { - a_.f32[ri] = src_.f32[ri]; - } - - return simde__m256_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compress_ps - #define _mm256_mask_compress_ps(src, k, a) simde_mm256_mask_compress_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_mask_compressstoreu_ps (void* base_addr, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm256_mask_compressstoreu_ps(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm256_mask_storeu_ps(base_addr, store_mask, _mm256_maskz_compress_ps(k, a)); - #else - simde__m256_private - a_ = simde__m256_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compressstoreu_ps - #define _mm256_mask_compressstoreu_ps(base_addr, k, a) simde_mm256_mask_compressstoreu_ps(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_compress_ps (simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_maskz_compress_ps(k, a); - #else - simde__m256_private - a_ = simde__m256_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) { - a_.f32[ri] = SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_compress_ps - #define _mm256_maskz_compress_ps(k, a) simde_mm256_maskz_compress_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_compress_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_mask_compress_epi32(src, k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - src_ = simde__m256i_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) { - a_.i32[ri] = src_.i32[ri]; - } - - return simde__m256i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compress_epi32 - #define _mm256_mask_compress_epi32(src, k, a) simde_mm256_mask_compress_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_mask_compressstoreu_epi32 (void* base_addr, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm256_mask_compressstoreu_epi32(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm256_mask_storeu_epi32(base_addr, store_mask, _mm256_maskz_compress_epi32(k, a)); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compressstoreu_epi32 - #define _mm256_mask_compressstoreu_epi32(base_addr, k, a) simde_mm256_mask_compressstoreu_epi32(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_compress_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_maskz_compress_epi32(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) { - a_.f32[ri] = INT32_C(0); - } - - return simde__m256i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_compress_epi32 - #define _mm256_maskz_compress_epi32(k, a) simde_mm256_maskz_compress_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_compress_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_mask_compress_epi64(src, k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - src_ = simde__m256i_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) { - a_.i64[ri] = src_.i64[ri]; - } - - return simde__m256i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compress_epi64 - #define _mm256_mask_compress_epi64(src, k, a) simde_mm256_mask_compress_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm256_mask_compressstoreu_epi64(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm256_mask_storeu_epi64(base_addr, store_mask, _mm256_maskz_compress_epi64(k, a)); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compressstoreu_epi64 - #define _mm256_mask_compressstoreu_epi64(base_addr, k, a) simde_mm256_mask_compressstoreu_epi64(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_compress_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm256_maskz_compress_epi64(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) { - a_.i64[ri] = INT64_C(0); - } - - return simde__m256i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_compress_epi64 - #define _mm256_maskz_compress_epi64(k, a) simde_mm256_maskz_compress_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_compress_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_compress_pd(src, k, a); - #else - simde__m512d_private - a_ = simde__m512d_to_private(a), - src_ = simde__m512d_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) { - a_.f64[ri] = src_.f64[ri]; - } - - return simde__m512d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compress_pd - #define _mm512_mask_compress_pd(src, k, a) simde_mm512_mask_compress_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm512_mask_compressstoreu_pd(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm512_mask_storeu_pd(base_addr, store_mask, _mm512_maskz_compress_pd(k, a)); - #else - simde__m512d_private - a_ = simde__m512d_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compressstoreu_pd - #define _mm512_mask_compressstoreu_pd(base_addr, k, a) simde_mm512_mask_compressstoreu_pd(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_compress_pd (simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_compress_pd(k, a); - #else - simde__m512d_private - a_ = simde__m512d_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if ((k >> i) & 1) { - a_.f64[ri++] = a_.f64[i]; - } - } - - for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) { - a_.f64[ri] = SIMDE_FLOAT64_C(0.0); - } - - return simde__m512d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_compress_pd - #define _mm512_maskz_compress_pd(k, a) simde_mm512_maskz_compress_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_compress_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_compress_ps(src, k, a); - #else - simde__m512_private - a_ = simde__m512_to_private(a), - src_ = simde__m512_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) { - a_.f32[ri] = src_.f32[ri]; - } - - return simde__m512_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compress_ps - #define _mm512_mask_compress_ps(src, k, a) simde_mm512_mask_compress_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_compressstoreu_ps (void* base_addr, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm512_mask_compressstoreu_ps(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask16 store_mask = _pext_u32(-1, k); - _mm512_mask_storeu_ps(base_addr, store_mask, _mm512_maskz_compress_ps(k, a)); - #else - simde__m512_private - a_ = simde__m512_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compressstoreu_ps - #define _mm512_mask_compressstoreu_ps(base_addr, k, a) simde_mm512_mask_compressstoreu_ps(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_compress_ps (simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_compress_ps(k, a); - #else - simde__m512_private - a_ = simde__m512_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if ((k >> i) & 1) { - a_.f32[ri++] = a_.f32[i]; - } - } - - for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) { - a_.f32[ri] = SIMDE_FLOAT32_C(0.0); - } - - return simde__m512_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_compress_ps - #define _mm512_maskz_compress_ps(k, a) simde_mm512_maskz_compress_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_compress_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_compress_epi32(src, k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - src_ = simde__m512i_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) { - a_.i32[ri] = src_.i32[ri]; - } - - return simde__m512i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compress_epi32 - #define _mm512_mask_compress_epi32(src, k, a) simde_mm512_mask_compress_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_compressstoreu_epi16 (void* base_addr, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(__znver4__) - _mm512_mask_compressstoreu_epi16(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VBMI2_NATIVE) && defined(__znver4__) - simde__mmask32 store_mask = _pext_u32(-1, k); - _mm512_mask_storeu_epi16(base_addr, store_mask, _mm512_maskz_compress_epi16(k, a)); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - if ((k >> i) & 1) { - a_.i16[ri++] = a_.i16[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.i16[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compressstoreu_epi16 - #define _mm512_mask_compressstoreu_epi16(base_addr, k, a) simde_mm512_mask_compressstoreu_epi16(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_compressstoreu_epi32 (void* base_addr, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm512_mask_compressstoreu_epi32(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask16 store_mask = _pext_u32(-1, k); - _mm512_mask_storeu_epi32(base_addr, store_mask, _mm512_maskz_compress_epi32(k, a)); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compressstoreu_epi32 - #define _mm512_mask_compressstoreu_epi32(base_addr, k, a) simde_mm512_mask_compressstoreu_epi32(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_compress_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_compress_epi32(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if ((k >> i) & 1) { - a_.i32[ri++] = a_.i32[i]; - } - } - - for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) { - a_.f32[ri] = INT32_C(0); - } - - return simde__m512i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_compress_epi32 - #define _mm512_maskz_compress_epi32(k, a) simde_mm512_maskz_compress_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_compress_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_compress_epi64(src, k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - src_ = simde__m512i_to_private(src); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) { - a_.i64[ri] = src_.i64[ri]; - } - - return simde__m512i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compress_epi64 - #define _mm512_mask_compress_epi64(src, k, a) simde_mm512_mask_compress_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) - _mm512_mask_compressstoreu_epi64(base_addr, k, a); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) - simde__mmask8 store_mask = _pext_u32(-1, k); - _mm512_mask_storeu_epi64(base_addr, store_mask, _mm512_maskz_compress_epi64(k, a)); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0])); - - return; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_compressstoreu_epi64 - #define _mm512_mask_compressstoreu_epi64(base_addr, k, a) simde_mm512_mask_compressstoreu_epi64(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_compress_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_compress_epi64(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a); - size_t ri = 0; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if ((k >> i) & 1) { - a_.i64[ri++] = a_.i64[i]; - } - } - - for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) { - a_.i64[ri] = INT64_C(0); - } - - return simde__m512i_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_compress_epi64 - #define _mm512_maskz_compress_epi64(k, a) simde_mm512_maskz_compress_epi64(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_COMPRESS_H) */ -/* :: End simde/x86/avx512/compress.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/conflict.h :: */ -#if !defined(SIMDE_X86_AVX512_CONFLICT_H) -#define SIMDE_X86_AVX512_CONFLICT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_conflict_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_conflict_epi32(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - simde_mm_movemask_ps( - simde_mm_castsi128_ps( - simde_mm_cmpeq_epi32(simde_mm_set1_epi32(a_.i32[i]), a) - ) - ) & ((1 << i) - 1); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_conflict_epi32 - #define _mm_conflict_epi32(a) simde_mm_conflict_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_conflict_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_mask_conflict_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_conflict_epi32 - #define _mm_mask_conflict_epi32(src, k, a) simde_mm_mask_conflict_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_conflict_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_maskz_conflict_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_conflict_epi32 - #define _mm_maskz_conflict_epi32(k, a) simde_mm_maskz_conflict_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_conflict_epi32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_conflict_epi32(a); - #else - simde__m256i_private - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()), - a_ = simde__m256i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - simde_mm256_movemask_ps( - simde_mm256_castsi256_ps( - simde_mm256_cmpeq_epi32(simde_mm256_set1_epi32(a_.i32[i]), a) - ) - ) & ((1 << i) - 1); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_conflict_epi32 - #define _mm256_conflict_epi32(a) simde_mm256_conflict_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_conflict_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_mask_conflict_epi32(src, k, a); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_conflict_epi32 - #define _mm256_mask_conflict_epi32(src, k, a) simde_mm256_mask_conflict_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_conflict_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_maskz_conflict_epi32(k, a); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_conflict_epi32 - #define _mm256_maskz_conflict_epi32(k, a) simde_mm256_maskz_conflict_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_conflict_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_conflict_epi32(a); - #else - simde__m512i_private - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()), - a_ = simde__m512i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - HEDLEY_STATIC_CAST( - int32_t, - simde_mm512_cmpeq_epi32_mask(simde_mm512_set1_epi32(a_.i32[i]), a) - ) & ((1 << i) - 1); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_conflict_epi32 - #define _mm512_conflict_epi32(a) simde_mm512_conflict_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_conflict_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_mask_conflict_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_conflict_epi32 - #define _mm512_mask_conflict_epi32(src, k, a) simde_mm512_mask_conflict_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_conflict_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_maskz_conflict_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_conflict_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_conflict_epi32 - #define _mm512_maskz_conflict_epi32(k, a) simde_mm512_maskz_conflict_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_conflict_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_conflict_epi64(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST( - int64_t, - simde_mm_movemask_pd( - simde_mm_castsi128_pd( - simde_mm_cmpeq_epi64(simde_mm_set1_epi64x(a_.i64[i]), a) - ) - ) - ) & ((1 << i) - 1); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_conflict_epi64 - #define _mm_conflict_epi64(a) simde_mm_conflict_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_conflict_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_mask_conflict_epi64(src, k, a); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_conflict_epi64 - #define _mm_mask_conflict_epi64(src, k, a) simde_mm_mask_conflict_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_conflict_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_maskz_conflict_epi64(k, a); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_conflict_epi64 - #define _mm_maskz_conflict_epi64(k, a) simde_mm_maskz_conflict_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_conflict_epi64 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_conflict_epi64(a); - #else - simde__m256i_private - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()), - a_ = simde__m256i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST( - int64_t, - simde_mm256_movemask_pd( - simde_mm256_castsi256_pd( - simde_mm256_cmpeq_epi64(simde_mm256_set1_epi64x(a_.i64[i]), a) - ) - ) - ) & ((1 << i) - 1); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_conflict_epi64 - #define _mm256_conflict_epi64(a) simde_mm256_conflict_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_conflict_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_mask_conflict_epi64(src, k, a); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_conflict_epi64 - #define _mm256_mask_conflict_epi64(src, k, a) simde_mm256_mask_conflict_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_conflict_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm256_maskz_conflict_epi64(k, a); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_conflict_epi64 - #define _mm256_maskz_conflict_epi64(k, a) simde_mm256_maskz_conflict_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_conflict_epi64 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_conflict_epi64(a); - #else - simde__m512i_private - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()), - a_ = simde__m512i_to_private(a); - - for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST( - int64_t, - simde_mm512_cmpeq_epi64_mask(simde_mm512_set1_epi64(a_.i64[i]), a) - ) & ((1 << i) - 1); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_conflict_epi64 - #define _mm512_conflict_epi64(a) simde_mm512_conflict_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_conflict_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_mask_conflict_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_conflict_epi64 - #define _mm512_mask_conflict_epi64(src, k, a) simde_mm512_mask_conflict_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_conflict_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm512_maskz_conflict_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_conflict_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_conflict_epi64 - #define _mm512_maskz_conflict_epi64(k, a) simde_mm512_maskz_conflict_epi64(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CONFLICT_H) */ -/* :: End simde/x86/avx512/conflict.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/copysign.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_COPYSIGN_H) -#define SIMDE_X86_AVX512_COPYSIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/xor.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_XOR_H) -#define SIMDE_X86_AVX512_XOR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - /* TODO: generate reduced case to give to Intel */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_ps - #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_ps - #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_ps - #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_pd - #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_pd - #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_pd - #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi32 - #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi32 - #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi32 - #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi64 - #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi64 - #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi64 - #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); - r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_si512 - #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ -/* :: End simde/x86/avx512/xor.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { - simde__m512_private - r_, - dest_ = simde__m512_to_private(dest), - src_ = simde__m512_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); - #endif - - return simde__m512_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { - simde__m512d_private - r_, - dest_ = simde__m512d_to_private(dest), - src_ = simde__m512d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); - #endif - - return simde__m512d_from_private(r_); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_COPYSIGN_H) */ -/* :: End simde/x86/avx512/copysign.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cvt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_CVT_H) -#define SIMDE_X86_AVX512_CVT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi64_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_cvtepi64_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx */ - __m128i xH = _mm_srai_epi32(a_.n, 16); - #if defined(SIMDE_X86_SSE4_2_NATIVE) - xH = _mm_blend_epi16(xH, _mm_setzero_si128(), 0x33); - #else - xH = _mm_and_si128(xH, _mm_set_epi16(~INT16_C(0), ~INT16_C(0), INT16_C(0), INT16_C(0), ~INT16_C(0), ~INT16_C(0), INT16_C(0), INT16_C(0))); - #endif - xH = _mm_add_epi64(xH, _mm_castpd_si128(_mm_set1_pd(442721857769029238784.0))); - const __m128i e = _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)); - #if defined(SIMDE_X86_SSE4_2_NATIVE) - __m128i xL = _mm_blend_epi16(a_.n, e, 0x88); - #else - __m128i m = _mm_set_epi16(INT16_C(0), ~INT16_C(0), ~INT16_C(0), ~INT16_C(0), INT16_C(0), ~INT16_C(0), ~INT16_C(0), ~INT16_C(0)); - __m128i xL = _mm_or_si128(_mm_and_si128(m, a_.n), _mm_andnot_si128(m, e)); - #endif - __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.0)); - return _mm_add_pd(f, _mm_castsi128_pd(xL)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi64_pd - #define _mm_cvtepi64_pd(a) simde_mm_cvtepi64_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_cvtepi64_pd(simde__m128d src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_mask_cvtepi64_pd(src, k, a); - #else - return simde_mm_mask_mov_pd(src, k, simde_mm_cvtepi64_pd(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cvtepi64_pd - #define _mm_mask_cvtepi64_pd(src, k, a) simde_mm_mask_cvtepi64_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_cvtepi64_pd(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_maskz_cvtepi64_pd(k, a); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_cvtepi64_pd(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_cvtepi64_pd - #define _mm_maskz_cvtepi64_pd(k, a) simde_mm_maskz_cvtepi64_pd((k), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepi16_epi32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi16_epi32(a); - #else - simde__m512i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi16_epi32 - #define _mm512_cvtepi16_epi32(a) simde_mm512_cvtepi16_epi32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtepi16_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi16_epi8(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i8, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi16_epi8 - #define _mm512_cvtepi16_epi8(a) simde_mm512_cvtepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cvtepi16_epi8(src, k, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtepi16_epi8 - #define _mm512_mask_cvtepi16_epi8(src, k, a) simde_mm512_mask_cvtepi16_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtepi16_epi8 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_cvtepi16_epi8(k, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtepi16_epi8 - #define _mm512_maskz_cvtepi16_epi8(k, a) simde_mm512_maskz_cvtepi16_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepi8_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi8_epi16(a); - #else - simde__m512i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = _mm256_cvtepi8_epi16(a_.m128i[0]); - r_.m256i[1] = _mm256_cvtepi8_epi16(a_.m128i[1]); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi8_epi16 - #define _mm512_cvtepi8_epi16(a) simde_mm512_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cvtepi32_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi32_ps(a); - #else - simde__m512_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = _mm256_cvtepi32_ps(a_.m256i[0]); - r_.m256[1] = _mm256_cvtepi32_ps(a_.m256i[1]); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi32_ps - #define _mm512_cvtepi32_ps(a) simde_mm512_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtepi64_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi64_epi32(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi64_epi32 - #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepu16_epi32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepu16_epi32(a); - #else - simde__m512i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.u16[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepu16_epi32 - #define _mm512_cvtepu16_epi32(a) simde_mm512_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cvtepu32_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepu32_ps(a); - #else - simde__m512_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - /* https://stackoverflow.com/a/34067907/501126 */ - const __m128 tmp = _mm_cvtepi32_ps(_mm_srli_epi32(a_.m128i[i], 1)); - r_.m128[i] = - _mm_add_ps( - _mm_add_ps(tmp, tmp), - _mm_cvtepi32_ps(_mm_and_si128(a_.m128i[i], _mm_set1_epi32(1))) - ); - } - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(float, a_.u32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepu32_ps - #define _mm512_cvtepu32_ps(a) simde_mm512_cvtepu32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cvtph_ps(simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtph_ps(a); - #endif - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m512_private r_; - - #if defined(SIMDE_X86_F16C_NATIVE) - r_.m256[0] = _mm256_cvtph_ps(a_.m128i[0]); - r_.m256[1] = _mm256_cvtph_ps(a_.m128i[1]); - #elif defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(a_.f16[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); - } - #endif - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtph_ps - #define _mm512_cvtph_ps(a) simde_mm512_cvtph_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtps_epi32(simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtps_epi32(a); - #endif - simde__m512_private a_ = simde__m512_to_private(a); - simde__m512i_private r_; - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256i[0] = _mm256_cvtps_epi32(a_.m256[0]); - r_.m256i[1] = _mm256_cvtps_epi32(a_.m256[1]); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtps_epi32 - #define _mm512_cvtps_epi32(a) simde_mm512_cvtps_epi32(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CVT_H) */ -/* :: End simde/x86/avx512/cvt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cvtt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_CVTT_H) -#define SIMDE_X86_AVX512_CVTT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi64 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_cvttpd_epi64(a); - #else - simde__m128i_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - r_.n = - _mm_set_epi64x( - _mm_cvttsd_si64(_mm_unpackhi_pd(a_.n, a_.n)), - _mm_cvttsd_si64(a_.n) - ); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vcvtq_s64_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_signed(a_.altivec_f64); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.f64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_cvttpd_epi64 - #define _mm_cvttpd_epi64(a) simde_mm_cvttpd_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_cvttpd_epi64(simde__m128i src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_mask_cvttpd_epi64(src, k, a); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_cvttpd_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cvttpd_epi64 - #define _mm_mask_cvttpd_epi64(src, k, a) simde_mm_mask_cvttpd_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_cvttpd_epi64(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_maskz_cvttpd_epi64(k, a); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_cvttpd_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_cvttpd_epi64 - #define _mm_maskz_cvttpd_epi64(k, a) simde_mm_maskz_cvttpd_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvttps_epi32 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvttps_epi32(a); - #else - simde__m512i_private r_; - simde__m512_private a_ = simde__m512_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvttps_epi32 - #define _mm512_cvttps_epi32(a) simde_mm512_cvttps_epi32(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CVTT_H) */ -/* :: End simde/x86/avx512/cvtt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cvts.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_CVTS_H) -#define SIMDE_X86_AVX512_CVTS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/storeu.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_STOREU_H) -#define SIMDE_X86_AVX512_STOREU_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#define simde_mm256_storeu_epi8(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#define simde_mm256_storeu_epi16(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#define simde_mm256_storeu_epi32(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#define simde_mm256_storeu_epi64(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_epi8 - #undef _mm256_storeu_epi16 - #define _mm256_storeu_epi8(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) - #define _mm256_storeu_epi16(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_epi32 - #undef _mm256_storeu_epi64 - #define _mm256_storeu_epi32(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) - #define _mm256_storeu_epi64(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_mask_storeu_epi16 (void * mem_addr, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - _mm256_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m256i zero = simde_mm256_setzero_si256(); - simde_mm256_storeu_epi16(mem_addr, simde_mm256_mask_mov_epi16(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_storeu_epi16 - #define _mm256_mask_storeu_epi16(mem_addr, k, a) simde_mm256_mask_storeu_epi16(mem_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_ps (void * mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_ps - #define _mm512_storeu_ps(mem_addr, a) simde_mm512_storeu_ps(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_pd (void * mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_pd - #define _mm512_storeu_pd(mem_addr, a) simde_mm512_storeu_pd(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_ph (void * mem_addr, simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - _mm512_storeu_ph(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_ph - #define _mm512_storeu_ph(mem_addr, a) simde_mm512_storeu_ph(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#define simde_mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_epi8 - #undef _mm512_storeu_epi16 - #define _mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_epi32 - #undef _mm512_storeu_epi64 - #undef _mm512_storeu_si512 - #define _mm512_storeu_si512(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_storeu_epi16 (void * mem_addr, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - _mm512_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m512i zero = simde_mm512_setzero_si512(); - simde_mm512_storeu_epi16(mem_addr, simde_mm512_mask_mov_epi16(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_storeu_epi16 - #define _mm512_mask_storeu_epi16(mem_addr, k, a) simde_mm512_mask_storeu_epi16(mem_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_storeu_epi32 (void * mem_addr, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_storeu_epi32(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m512i zero = simde_mm512_setzero_si512(); - simde_mm512_storeu_epi32(mem_addr, simde_mm512_mask_mov_epi32(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_storeu_epi32 - #define _mm512_mask_storeu_epi32(mem_addr, k, a) simde_mm512_mask_storeu_epi32(mem_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_storeu_epi64(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m512i zero = simde_mm512_setzero_si512(); - simde_mm512_storeu_epi64(mem_addr, simde_mm512_mask_mov_epi64(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_storeu_epi64 - #define _mm512_mask_storeu_epi64(mem_addr, k, a) simde_mm512_mask_storeu_epi64(mem_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_storeu_ps (void * mem_addr, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_storeu_ps(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m512 zero = simde_mm512_setzero_ps(); - simde_mm512_storeu_ps(mem_addr, simde_mm512_mask_mov_ps(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_storeu_ps - #define _mm512_mask_storeu_ps(mem_addr, k, a) simde_mm512_mask_storeu_ps(mem_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_storeu_pd (void * mem_addr, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_storeu_pd(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); - #else - const simde__m512d zero = simde_mm512_setzero_pd(); - simde_mm512_storeu_pd(mem_addr, simde_mm512_mask_mov_pd(zero, k, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_storeu_pd - #define _mm512_mask_storeu_pd(mem_addr, k, a) simde_mm512_mask_storeu_pd(mem_addr, k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_STOREU_H) */ -/* :: End simde/x86/avx512/storeu.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/loadu.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LOADU_H) -#define SIMDE_X86_AVX512_LOADU_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_loadu_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_298042) - return _mm512_loadu_ps(SIMDE_ALIGN_CAST(const float *, mem_addr)); - #else - return _mm512_loadu_ps(mem_addr); - #endif - #else - simde__m512 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_ps - #define _mm512_loadu_ps(a) simde_mm512_loadu_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_loadu_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - #if defined(SIMDE_BUG_CLANG_REV_298042) - return _mm512_loadu_pd(SIMDE_ALIGN_CAST(const double *, mem_addr)); - #else - return _mm512_loadu_pd(mem_addr); - #endif - #else - simde__m512d r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_pd - #define _mm512_loadu_pd(a) simde_mm512_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_loadu_ph (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_loadu_ph(mem_addr); - #else - simde__m512h r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_ph - #define _mm512_loadu_ph(a) simde_mm512_loadu_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_loadu_si512 (void const * mem_addr) { - simde__m512i r; - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm512_loadu_si512_s { - __typeof__(r) v; - } __attribute__((__packed__, __may_alias__)); - r = HEDLEY_REINTERPRET_CAST(const struct simde_mm512_loadu_si512_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #else - simde_memcpy(&r, mem_addr, sizeof(r)); - #endif - - return r; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(10,0,0)) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_mm512_loadu_si512(mem_addr) _mm512_loadu_si512(mem_addr) - #define simde_mm512_loadu_epi32(mem_addr) _mm512_loadu_epi32(mem_addr) - #define simde_mm512_loadu_epi64(mem_addr) _mm512_loadu_epi64(mem_addr) -#else - #define simde_mm512_loadu_epi32(mem_addr) simde_mm512_loadu_si512(mem_addr) - #define simde_mm512_loadu_epi64(mem_addr) simde_mm512_loadu_si512(mem_addr) -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(11,0,0)) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_mm512_loadu_epi8(mem_addr) _mm512_loadu_epi8(mem_addr) - #define simde_mm512_loadu_epi16(mem_addr) _mm512_loadu_epi16(mem_addr) -#else - #define simde_mm512_loadu_epi8(mem_addr) simde_mm512_loadu_si512(mem_addr) - #define simde_mm512_loadu_epi16(mem_addr) simde_mm512_loadu_si512(mem_addr) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_epi8 - #undef _mm512_loadu_epi16 - #define _mm512_loadu_epi8(a) simde_mm512_loadu_epi8(a) - #define _mm512_loadu_epi16(a) simde_mm512_loadu_epi16(a) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_epi32 - #undef _mm512_loadu_epi64 - #undef _mm512_loadu_si512 - #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a) - #define _mm512_loadu_epi32(a) simde_mm512_loadu_epi32(a) - #define _mm512_loadu_epi64(a) simde_mm512_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_loadu_epi16 (simde__mmask16 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_loadu_epi16(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_loadu_epi16(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_loadu_epi16 - #define _mm256_maskz_loadu_epi16(k, mem_addr) simde_mm256_maskz_loadu_epi16(k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_loadu_ps (simde__mmask8 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_loadu_ps(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_loadu_ps(HEDLEY_REINTERPRET_CAST(const float*, mem_addr))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_loadu_ps - #define _mm256_maskz_loadu_ps(k, mem_addr) simde_mm256_maskz_loadu_ps(k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_loadu_epi16 (simde__m512i src, simde__mmask32 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_loadu_epi16(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_loadu_epi16(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_loadu_epi16 - #define _mm512_mask_loadu_epi16(src, k, mem_addr) simde_mm512_mask_loadu_epi16(src, k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_loadu_epi16 (simde__mmask32 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_loadu_epi16(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_loadu_epi16(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_loadu_epi16 - #define _mm512_maskz_loadu_epi16(k, mem_addr) simde_mm512_maskz_loadu_epi16(k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_loadu_epi32 (simde__m512i src, simde__mmask16 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_loadu_epi32(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_loadu_epi32(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_loadu_epi32 - #define _mm512_mask_loadu_epi32(src, k, mem_addr) simde_mm512_mask_loadu_epi32(src, k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_loadu_epi64 (simde__m512i src, simde__mmask8 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_loadu_epi64(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_loadu_epi64(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_loadu_epi64 - #define _mm512_mask_loadu_epi64(src, k, mem_addr) simde_mm512_mask_loadu_epi64(src, k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_loadu_epi64 (simde__mmask8 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_loadu_epi64(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_loadu_epi64(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_loadu_epi64 - #define _mm512_maskz_loadu_epi64(k, mem_addr) simde_mm512_maskz_loadu_epi64((k), (mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_loadu_pd (simde__m512d src, simde__mmask8 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_loadu_pd(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_loadu_pd(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_loadu_pd - #define _mm512_mask_loadu_pd(src, k, mem_addr) simde_mm512_mask_loadu_pd(src, k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_loadu_ps (simde__m512 src, simde__mmask16 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_loadu_ps(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_loadu_ps(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_loadu_ps - #define _mm512_mask_loadu_ps(src, k, mem_addr) simde_mm512_mask_loadu_ps(src, k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_loadu_ps (simde__mmask16 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_loadu_ps(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_loadu_ps(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_loadu_ps - #define _mm512_maskz_loadu_ps(k, mem_addr) simde_mm512_maskz_loadu_ps(k, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_loadu_pd (simde__mmask8 k, void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_loadu_pd(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_loadu_pd(mem_addr)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_loadu_pd - #define _mm512_maskz_loadu_pd(k, mem_addr) simde_mm512_maskz_loadu_pd(k, mem_addr) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LOADU_H) */ -/* :: End simde/x86/avx512/loadu.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi16_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi16_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi16_epi8 - #define _mm_cvtsepi16_epi8(a) simde_mm_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi16_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi16_epi8(a); - #else - simde__m128i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi16_epi8 - #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi32_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi32_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi32_epi8 - #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi32_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi32_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi32_epi8 - #define _mm256_cvtsepi32_epi8(a) simde_mm256_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi32_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi32_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi32_epi16 - #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi32_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi32_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi32_epi16 - #define _mm256_cvtsepi32_epi16(a) simde_mm256_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi64_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi64_epi8 - #define _mm_cvtsepi64_epi8(a) simde_mm_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi64_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi64_epi8 - #define _mm256_cvtsepi64_epi8(a) simde_mm256_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi16_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtsepi16_epi8(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi16_epi8 - #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cvtsepi16_epi8(src, k, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtsepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi16_epi8 - #define _mm512_mask_cvtsepi16_epi8(src, k, a) simde_mm512_mask_cvtsepi16_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi16_epi8 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_cvtsepi16_epi8(k, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtsepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi16_epi8 - #define _mm512_maskz_cvtsepi16_epi8(k, a) simde_mm512_maskz_cvtsepi16_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi32_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi32_epi8(a); - #else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi32_epi8 - #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi32_epi8 (simde__m128i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi32_epi8(src, k, a); - #else - simde__m128i_private r_; - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : src_.i8[i] ; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi32_epi8 - #define _mm512_mask_cvtsepi32_epi8(src, k, a) simde_mm512_mask_cvtsepi32_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_cvtsepi32_storeu_epi8 (void* base_addr, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_cvtsepi32_storeu_epi8(base_addr, k, a); - #else - simde__m256i_private r_ = simde__m256i_to_private(simde_mm256_loadu_epi8(base_addr)); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : r_.i8[i]; - } - - simde_mm256_storeu_epi8(base_addr, simde__m256i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi32_storeu_epi8 - #define _mm512_mask_cvtsepi32_storeu_epi8(base_addr, k, a) simde_mm512_mask_cvtsepi32_storeu_epi8(base_addr, k, a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi32_epi8 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi32_epi8(k, a); - #else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : INT8_C(0) ; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi32_epi8 - #define _mm512_maskz_cvtsepi32_epi8(k, a) simde_mm512_maskz_cvtsepi32_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi32_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi32_epi16(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi32_epi16 - #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi32_epi16 (simde__m256i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi32_epi16(src, k, a); - #else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : src_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi32_epi16 - #define _mm512_mask_cvtsepi32_epi16(src, k, a) simde_mm512_mask_cvtsepi32_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_cvtsepi32_storeu_epi16 (void* base_addr, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_cvtsepi32_storeu_epi16(base_addr, k, a); - #else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(simde_mm256_loadu_epi16(base_addr)); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : src_.i16[i]; - } - - simde_mm256_storeu_epi16(base_addr, simde__m256i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi32_storeu_epi16 - #define _mm512_mask_cvtsepi32_storeu_epi16(base_addr, k, a) simde_mm512_mask_cvtsepi32_storeu_epi16(base_addr, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi32_epi16 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi32_epi16(k, a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : INT16_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi32_epi16 - #define _mm512_maskz_cvtsepi32_epi16(k, a) simde_mm512_maskz_cvtsepi32_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi64_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi8 - #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi64_epi8 (simde__m128i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi8(src, k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : src_.i8[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi64_epi8 - #define _mm512_mask_cvtsepi64_epi8(src, k, a) simde_mm512_mask_cvtsepi64_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi64_epi8 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi8(k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi64_epi8 - #define _mm512_maskz_cvtsepi64_epi8(k, a) simde_mm512_maskz_cvtsepi64_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi64_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = - (a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi16 - #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi64_epi16 (simde__m128i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi16(src, k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi64_epi16 - #define _mm512_mask_cvtsepi64_epi16(src, k, a) simde_mm512_mask_cvtsepi64_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi64_epi16 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi16(k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi64_epi16 - #define _mm512_maskz_cvtsepi64_epi16(k, a) simde_mm512_maskz_cvtsepi64_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi64_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi32(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = - (a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi32 - #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi64_epi32 (simde__m256i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi32(src, k, a); - #else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : src_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi64_epi32 - #define _mm512_mask_cvtsepi64_epi32(src, k, a) simde_mm512_mask_cvtsepi64_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi64_epi32 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi32(k, a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi64_epi32 - #define _mm512_maskz_cvtsepi64_epi32(k, a) simde_mm512_maskz_cvtsepi64_epi32(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CVTS_H) */ -/* :: End simde/x86/avx512/cvts.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cvtus.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_CVTUS_H) -#define SIMDE_X86_AVX512_CVTUS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_mask_cvtusepi32_storeu_epi8 (void* base_addr, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_mask_cvtusepi32_storeu_epi8(base_addr, k, a); - #else - simde__m256i_private r_ = simde__m256i_to_private(simde_mm256_loadu_epi8(base_addr)); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.u32[i] > UINT8_MAX) - ? (HEDLEY_STATIC_CAST(int8_t, UINT8_MAX)) - : HEDLEY_STATIC_CAST(int8_t, a_.u32[i])) : r_.i8[i]; - } - - simde_mm256_storeu_epi8(base_addr, simde__m256i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtusepi32_storeu_epi8 - #define _mm512_mask_cvtusepi32_storeu_epi8(base_addr, k, a) simde_mm512_mask_cvtusepi32_storeu_epi8((base_addr), (k), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CVTUS_H) */ -/* :: End simde/x86/avx512/cvtus.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dbsad.h :: */ -#if !defined(SIMDE_X86_AVX512_DBSAD_H) -#define SIMDE_X86_AVX512_DBSAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/shuffle.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_SHUFFLE_H) -#define SIMDE_X86_AVX512_SHUFFLE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/extract.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_EXTRACT_H) -#define SIMDE_X86_AVX512_EXTRACT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf32x4_ps (simde__m256 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - - return a_.m128[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_extractf32x4_ps(a, imm8) _mm256_extractf32x4_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf32x4_ps - #define _mm256_extractf32x4_ps(a, imm8) simde_mm256_extractf32x4_ps((a), (imm8)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512_private a_ = simde__m512_to_private(a); - - /* GCC 6 generates an ICE */ - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(7,0,0) - return a_.m128[imm8 & 3]; - #else - simde__m128_private r_; - const size_t offset = HEDLEY_STATIC_CAST(size_t, imm8 & 3) * (sizeof(r_.f32) / sizeof(r_.f32[0])); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i + offset]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf32x4_ps - #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps((src), (k), simde_mm512_extractf32x4_ps((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf32x4_ps - #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) -#else - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps((k), simde_mm512_extractf32x4_ps((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf32x4_ps - #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps((k), (a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_extractf32x8_ps (simde__m512 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512_private a_ = simde__m512_to_private(a); - - return a_.m256[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_extractf32x8_ps(a, imm8) _mm512_extractf32x8_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf32x8_ps - #define _mm512_extractf32x8_ps(a, imm8) simde_mm512_extractf32x8_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - return a_.m256d[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf64x4_pd - #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf64x4_pd - #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) -#else - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf64x4_pd - #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m128i[imm8 & 3]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti32x4_epi32 - #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti32x4_epi32 - #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti32x4_epi32 - #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_extracti32x8_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m256i[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti32x8_epi32(a, imm8) _mm512_extracti32x8_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti32x8_epi32 - #define _mm512_extracti32x8_epi32(a, imm8) simde_mm512_extracti32x8_epi32((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX51FDQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti32x8_epi32(src, k, a, imm8) _mm512_mask_extracti32x8_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti32x8_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32((src), (k), simde_mm512_extracti32x8_epi32((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti32x8_epi32 - #define _mm512_mask_extracti32x8_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x8_epi32((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti32x8_epi32(k, a, imm8) _mm512_maskz_extracti32x8_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_extracti32x8_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32((k), simde_mm512_extracti32x8_epi32((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti32x8_epi32 - #define _mm512_maskz_extracti32x8_epi32(k, a, imm8) simde_mm512_maskz_extracti32x8_epi32((k), (a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m256i[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti64x4_epi64 - #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64((src), (k), simde_mm512_extracti64x4_epi64((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti64x4_epi64 - #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) -#else - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64((k), simde_mm512_extracti64x4_epi64((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti64x4_epi64 - #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64((k), (a), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_EXTRACT_H) */ -/* :: End simde/x86/avx512/extract.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_shuffle_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_shuffle_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] & 0x80) ? 0 : a_.i8[(b_.i8[i] & 0x0f) + (i & 0x30)]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_epi8 - #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_shuffle_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_shuffle_epi8 - #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_shuffle_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_shuffle_epi8 - #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) -# define simde_mm512_shuffle_epi32(a, imm8) _mm512_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shuffle_epi32(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shuffle_epi32_r_, \ - simde_mm512_shuffle_epi32_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shuffle_epi32_r_.m128i[0] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[0], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[1] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[1], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[2] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[2], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[3] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shuffle_epi32_r_); \ - })) -#else -# define simde_mm512_shuffle_epi32(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 3), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 2), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_epi32 - #define _mm512_shuffle_epi32(a, imm8) simde_mm512_shuffle_epi32((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i[0] = a_.m128i[ imm8 & 1]; - r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_i32x4 - #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) -#endif - -#define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) -#define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) - -#define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) -#define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) -#define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) - -#define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) -#define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) -#define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) - -#define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) -#define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) -#define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) - -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_shuffle_i32x4 - #undef _mm256_mask_shuffle_i32x4 - #define _mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) - #define _mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) - - #undef _mm256_shuffle_f32x4 - #undef _mm256_maskz_shuffle_f32x4 - #undef _mm256_mask_shuffle_f32x4 - #define _mm256_shuffle_f32x4(a, b, imm8) simde_mm256_shuffle_f32x4(a, b, imm8) - #define _mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) - #define _mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) - - #undef _mm256_shuffle_i64x2 - #undef _mm256_maskz_shuffle_i64x2 - #undef _mm256_mask_shuffle_i64x2 - #define _mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i64x2(a, b, imm8) - #define _mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) - #define _mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) - - #undef _mm256_shuffle_f64x2 - #undef _mm256_maskz_shuffle_f64x2 - #undef _mm256_mask_shuffle_f64x2 - #define _mm256_shuffle_f64x2(a, b, imm8) simde_mm256_shuffle_f64x2(a, b, imm8) - #define _mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) - #define _mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - r_.m128i[0] = a_.m128i[ imm8 & 3]; - r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; - r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; - r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_i32x4 - #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) -#endif - -#define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) -#define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) - -#define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) -#define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) -#define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) - -#define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) -#define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) -#define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) - -#define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) -#define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) -#define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) - -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_shuffle_i32x4 - #undef _mm512_mask_shuffle_i32x4 - #define _mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) - #define _mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) - - #undef _mm512_shuffle_f32x4 - #undef _mm512_maskz_shuffle_f32x4 - #undef _mm512_mask_shuffle_f32x4 - #define _mm512_shuffle_f32x4(a, b, imm8) simde_mm512_shuffle_f32x4(a, b, imm8) - #define _mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) - #define _mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) - - #undef _mm512_shuffle_i64x2 - #undef _mm512_maskz_shuffle_i64x2 - #undef _mm512_mask_shuffle_i64x2 - #define _mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i64x2(a, b, imm8) - #define _mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) - #define _mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) - - #undef _mm512_shuffle_f64x2 - #undef _mm512_maskz_shuffle_f64x2 - #undef _mm512_mask_shuffle_f64x2 - #define _mm512_shuffle_f64x2(a, b, imm8) simde_mm512_shuffle_f64x2(a, b, imm8) - #define _mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) - #define _mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_ps(a, b, imm8) _mm512_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ - \ - simde_mm512_shuffle_ps_a_.m256[0] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[0], simde_mm512_shuffle_ps_b_.m256[0], imm8); \ - simde_mm512_shuffle_ps_a_.m256[1] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[1], simde_mm512_shuffle_ps_b_.m256[1], imm8); \ - \ - simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ - \ - simde_mm512_shuffle_ps_a_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 64, \ - simde_mm512_shuffle_ps_a_.f32, \ - simde_mm512_shuffle_ps_b_.f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 16, \ - (((imm8) >> 6) & 3) + 16, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 20, \ - (((imm8) >> 6) & 3) + 20, \ - (((imm8) ) & 3) + 8, \ - (((imm8) >> 2) & 3) + 8, \ - (((imm8) >> 4) & 3) + 24, \ - (((imm8) >> 6) & 3) + 24, \ - (((imm8) ) & 3) + 12, \ - (((imm8) >> 2) & 3) + 12, \ - (((imm8) >> 4) & 3) + 28, \ - (((imm8) >> 6) & 3) + 28 \ - ); \ - \ - simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_shuffle_ps(simde__m512 a, simde__m512 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - const size_t halfway = (sizeof(r_.m128_private[0].f32) / sizeof(r_.m128_private[0].f32[0]) / 2); - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - SIMDE_VECTORIZE - for (size_t j = 0 ; j < halfway ; j++) { - r_.m128_private[i].f32[j] = a_.m128_private[i].f32[(imm8 >> (j * 2)) & 3]; - r_.m128_private[i].f32[halfway + j] = b_.m128_private[i].f32[(imm8 >> ((halfway + j) * 2)) & 3]; - } - } - - return simde__m512_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_ps - #define _mm512_shuffle_ps(a, b, imm8) simde_mm512_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_shuffle_pd(simde__m512d a, simde__m512d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.f64) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[i * 2] = (imm8 & ( 1 << (i*2) )) ? a_.f64[i * 2 + 1]: a_.f64[i * 2]; - r_.f64[i * 2 + 1] = (imm8 & ( 1 << (i*2+1) )) ? b_.f64[i * 2 + 1]: b_.f64[i * 2]; - } - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_pd(a, b, imm8) _mm512_shuffle_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_pd - #define _mm512_shuffle_pd(a, b, imm8) simde_mm512_shuffle_pd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_shufflehi_epi16(a, imm8) _mm512_shufflehi_epi16(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shufflehi_epi16(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shufflehi_epi16_r_, \ - simde_mm512_shufflehi_epi16_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shufflehi_epi16_r_.m128i[0] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[0], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[1] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[1], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[2] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[2], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[3] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shufflehi_epi16_r_); \ - })) -#else -# define simde_mm512_shufflehi_epi16(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 3), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 2), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shufflehi_epi16 - #define _mm512_shufflehi_epi16(a, imm8) simde_mm512_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_shufflelo_epi16(a, imm8) _mm512_shufflelo_epi16(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shufflelo_epi16(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shufflelo_epi16_r_, \ - simde_mm512_shufflelo_epi16_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shufflelo_epi16_r_.m128i[0] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[0], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[1] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[1], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[2] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[2], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[3] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shufflelo_epi16_r_); \ - })) -#else -# define simde_mm512_shufflelo_epi16(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 3), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 2), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shufflelo_epi16 - #define _mm512_shufflelo_epi16(a, imm8) simde_mm512_shufflelo_epi16(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SHUFFLE_H) */ -/* :: End simde/x86/avx512/shuffle.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_dbsad_epu8(a, b, imm8) _mm_dbsad_epu8((a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_dbsad_epu8_internal_ (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - uint8_t a1 SIMDE_VECTOR(16) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, a_.u8, a_.u8, - 0, 1, 0, 1, - 4, 5, 4, 5, - 8, 9, 8, 9, - 12, 13, 12, 13); - uint8_t b1 SIMDE_VECTOR(16) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, b_.u8, b_.u8, - 0, 1, 1, 2, - 2, 3, 3, 4, - 8, 9, 9, 10, - 10, 11, 11, 12); - - __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); - __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); - - r_.u16 = - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16)); - - uint8_t a2 SIMDE_VECTOR(16) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, a_.u8, a_.u8, - 2, 3, 2, 3, - 6, 7, 6, 7, - 10, 11, 10, 11, - 14, 15, 14, 15); - uint8_t b2 SIMDE_VECTOR(16) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, b_.u8, b_.u8, - 2, 3, 3, 4, - 4, 5, 5, 6, - 10, 11, 11, 12, - 12, 13, 13, 14); - - __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); - __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); - - r_.u16 += - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16)); - #else - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = 0; - for (size_t j = 0 ; j < 4 ; j++) { - uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[((i << 1) & 12) + j]); - uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8)) + j]); - r_.u16[i] += (A < B) ? (B - A) : (A - B); - } - } - #endif - - return simde__m128i_from_private(r_); - } - #define simde_mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8_internal_((a), simde_mm_shuffle_epi32((b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_dbsad_epu8 - #define _mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) _mm_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) -#else - #define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_mov_epi16(src, k, simde_mm_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dbsad_epu8 - #define _mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) _mm_maskz_dbsad_epu8((k), (a), (b), (imm8)) -#else - #define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_mov_epi16(k, simde_mm_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dbsad_epu8 - #define _mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_dbsad_epu8(k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_dbsad_epu8(a, b, imm8) _mm256_dbsad_epu8((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256i_private \ - simde_mm256_dbsad_epu8_a_ = simde__m256i_to_private(a), \ - simde_mm256_dbsad_epu8_b_ = simde__m256i_to_private(b); \ - \ - simde_mm256_dbsad_epu8_a_.m128i[0] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[0], simde_mm256_dbsad_epu8_b_.m128i[0], imm8); \ - simde_mm256_dbsad_epu8_a_.m128i[1] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[1], simde_mm256_dbsad_epu8_b_.m128i[1], imm8); \ - \ - simde__m256i_from_private(simde_mm256_dbsad_epu8_a_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_dbsad_epu8_internal_ (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - uint8_t a1 SIMDE_VECTOR(32) = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, a_.u8, a_.u8, - 0, 1, 0, 1, - 4, 5, 4, 5, - 8, 9, 8, 9, - 12, 13, 12, 13, - 16, 17, 16, 17, - 20, 21, 20, 21, - 24, 25, 24, 25, - 28, 29, 28, 29); - uint8_t b1 SIMDE_VECTOR(32) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, b_.u8, b_.u8, - 0, 1, 1, 2, - 2, 3, 3, 4, - 8, 9, 9, 10, - 10, 11, 11, 12, - 16, 17, 17, 18, - 18, 19, 19, 20, - 24, 25, 25, 26, - 26, 27, 27, 28); - - __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); - __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); - - r_.u16 = - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16)); - - uint8_t a2 SIMDE_VECTOR(32) = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, a_.u8, a_.u8, - 2, 3, 2, 3, - 6, 7, 6, 7, - 10, 11, 10, 11, - 14, 15, 14, 15, - 18, 19, 18, 19, - 22, 23, 22, 23, - 26, 27, 26, 27, - 30, 31, 30, 31); - uint8_t b2 SIMDE_VECTOR(32) = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, b_.u8, b_.u8, - 2, 3, 3, 4, - 4, 5, 5, 6, - 10, 11, 11, 12, - 12, 13, 13, 14, - 18, 19, 19, 20, - 20, 21, 21, 22, - 26, 27, 27, 28, - 28, 29, 29, 30); - - __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); - __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); - - r_.u16 += - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16)); - #else - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = 0; - for (size_t j = 0 ; j < 4 ; j++) { - uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1)) + j]); - uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1)) + j]); - r_.u16[i] += (A < B) ? (B - A) : (A - B); - } - } - #endif - - return simde__m256i_from_private(r_); - } - #define simde_mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8_internal_((a), simde_mm256_shuffle_epi32(b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_dbsad_epu8 - #define _mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) _mm256_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) -#else - #define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_mov_epi16(src, k, simde_mm256_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dbsad_epu8 - #define _mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) _mm256_maskz_dbsad_epu8((k), (a), (b), (imm8)) -#else - #define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_mov_epi16(k, simde_mm256_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dbsad_epu8 - #define _mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_dbsad_epu8(a, b, imm8) _mm512_dbsad_epu8((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private \ - simde_mm512_dbsad_epu8_a_ = simde__m512i_to_private(a), \ - simde_mm512_dbsad_epu8_b_ = simde__m512i_to_private(b); \ - \ - simde_mm512_dbsad_epu8_a_.m256i[0] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[0], simde_mm512_dbsad_epu8_b_.m256i[0], imm8); \ - simde_mm512_dbsad_epu8_a_.m256i[1] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[1], simde_mm512_dbsad_epu8_b_.m256i[1], imm8); \ - \ - simde__m512i_from_private(simde_mm512_dbsad_epu8_a_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_dbsad_epu8_internal_ (simde__m512i a, simde__m512i b) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - uint8_t a1 SIMDE_VECTOR(64) = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, a_.u8, a_.u8, - 0, 1, 0, 1, - 4, 5, 4, 5, - 8, 9, 8, 9, - 12, 13, 12, 13, - 16, 17, 16, 17, - 20, 21, 20, 21, - 24, 25, 24, 25, - 28, 29, 28, 29, - 32, 33, 32, 33, - 36, 37, 36, 37, - 40, 41, 40, 41, - 44, 45, 44, 45, - 48, 49, 48, 49, - 52, 53, 52, 53, - 56, 57, 56, 57, - 60, 61, 60, 61); - uint8_t b1 SIMDE_VECTOR(64) = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, b_.u8, b_.u8, - 0, 1, 1, 2, - 2, 3, 3, 4, - 8, 9, 9, 10, - 10, 11, 11, 12, - 16, 17, 17, 18, - 18, 19, 19, 20, - 24, 25, 25, 26, - 26, 27, 27, 28, - 32, 33, 33, 34, - 34, 35, 35, 36, - 40, 41, 41, 42, - 42, 43, 43, 44, - 48, 49, 49, 50, - 50, 51, 51, 52, - 56, 57, 57, 58, - 58, 59, 59, 60); - - __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); - __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); - - r_.u16 = - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16)); - - uint8_t a2 SIMDE_VECTOR(64) = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, a_.u8, a_.u8, - 2, 3, 2, 3, - 6, 7, 6, 7, - 10, 11, 10, 11, - 14, 15, 14, 15, - 18, 19, 18, 19, - 22, 23, 22, 23, - 26, 27, 26, 27, - 30, 31, 30, 31, - 34, 35, 34, 35, - 38, 39, 38, 39, - 42, 43, 42, 43, - 46, 47, 46, 47, - 50, 51, 50, 51, - 54, 55, 54, 55, - 58, 59, 58, 59, - 62, 63, 62, 63); - uint8_t b2 SIMDE_VECTOR(64) = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, b_.u8, b_.u8, - 2, 3, 3, 4, - 4, 5, 5, 6, - 10, 11, 11, 12, - 12, 13, 13, 14, - 18, 19, 19, 20, - 20, 21, 21, 22, - 26, 27, 27, 28, - 28, 29, 29, 30, - 34, 35, 35, 36, - 36, 37, 37, 38, - 42, 43, 43, 44, - 44, 45, 45, 46, - 50, 51, 51, 52, - 52, 53, 53, 54, - 58, 59, 59, 60, - 60, 61, 61, 62); - - __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); - __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); - - r_.u16 += - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) + - __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16)); - #else - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = 0; - for (size_t j = 0 ; j < 4 ; j++) { - uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1) | ((i & 16) << 1)) + j]); - uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1) | ((i & 16) << 1)) + j]); - r_.u16[i] += (A < B) ? (B - A) : (A - B); - } - } - #endif - - return simde__m512i_from_private(r_); - } - #define simde_mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8_internal_((a), simde_mm512_castps_si512(simde_mm512_shuffle_ps(simde_mm512_castsi512_ps(b), simde_mm512_castsi512_ps(b), imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_dbsad_epu8 - #define _mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) _mm512_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) -#else - #define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_mov_epi16(src, k, simde_mm512_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dbsad_epu8 - #define _mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) _mm512_maskz_dbsad_epu8((k), (a), (b), (imm8)) -#else - #define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_mov_epi16(k, simde_mm512_dbsad_epu8(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dbsad_epu8 - #define _mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DBSAD_H) */ -/* :: End simde/x86/avx512/dbsad.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/div.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_DIV_H) -#define SIMDE_X86_AVX512_DIV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_div_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_ps - #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_ps - #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_ps - #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_div_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_pd - #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_pd - #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_pd - #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DIV_H) */ -/* :: End simde/x86/avx512/div.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dpbf16.h :: */ -#if !defined(SIMDE_X86_AVX512_DPBF16_H) -#define SIMDE_X86_AVX512_DPBF16_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dpbf16_ps (simde__m128 src, simde__m128bh a, simde__m128bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_dpbf16_ps(src, a, b); - #else - simde__m128_private - src_ = simde__m128_to_private(src); - simde__m128bh_private - a_ = simde__m128bh_to_private(a), - b_ = simde__m128bh_to_private(b); - - #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - uint32_t x1 SIMDE_VECTOR(32); - uint32_t x2 SIMDE_VECTOR(32); - simde__m128_private - r1_[2], - r2_[2]; - - a_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - a_.u16, a_.u16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - b_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 16, - b_.u16, b_.u16, - 0, 2, 4, 6, - 1, 3, 5, 7 - ); - - SIMDE_CONVERT_VECTOR_(x1, a_.u16); - SIMDE_CONVERT_VECTOR_(x2, b_.u16); - - x1 <<= 16; - x2 <<= 16; - - simde_memcpy(&r1_, &x1, sizeof(x1)); - simde_memcpy(&r2_, &x2, sizeof(x2)); - - src_.f32 += - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); - } - #endif - - return simde__m128_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_dpbf16_ps - #define _mm_dpbf16_ps(src, a, b) simde_mm_dpbf16_ps(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_dpbf16_ps (simde__m128 src, simde__mmask8 k, simde__m128bh a, simde__m128bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_dpbf16_ps(src, k, a, b); - #else - return simde_mm_mask_mov_ps(src, k, simde_mm_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dpbf16_ps - #define _mm_mask_dpbf16_ps(src, k, a, b) simde_mm_mask_dpbf16_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_dpbf16_ps (simde__mmask8 k, simde__m128 src, simde__m128bh a, simde__m128bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_dpbf16_ps(k, src, a, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dpbf16_ps - #define _mm_maskz_dpbf16_ps(k, src, a, b) simde_mm_maskz_dpbf16_ps(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_dpbf16_ps (simde__m256 src, simde__m256bh a, simde__m256bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_dpbf16_ps(src, a, b); - #else - simde__m256_private - src_ = simde__m256_to_private(src); - simde__m256bh_private - a_ = simde__m256bh_to_private(a), - b_ = simde__m256bh_to_private(b); - - #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - uint32_t x1 SIMDE_VECTOR(64); - uint32_t x2 SIMDE_VECTOR(64); - simde__m256_private - r1_[2], - r2_[2]; - - a_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - a_.u16, a_.u16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - b_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 32, - b_.u16, b_.u16, - 0, 2, 4, 6, 8, 10, 12, 14, - 1, 3, 5, 7, 9, 11, 13, 15 - ); - - SIMDE_CONVERT_VECTOR_(x1, a_.u16); - SIMDE_CONVERT_VECTOR_(x2, b_.u16); - - x1 <<= 16; - x2 <<= 16; - - simde_memcpy(&r1_, &x1, sizeof(x1)); - simde_memcpy(&r2_, &x2, sizeof(x2)); - - src_.f32 += - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); - } - #endif - - return simde__m256_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_dpbf16_ps - #define _mm256_dpbf16_ps(src, a, b) simde_mm256_dpbf16_ps(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_dpbf16_ps (simde__m256 src, simde__mmask8 k, simde__m256bh a, simde__m256bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_dpbf16_ps(src, k, a, b); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dpbf16_ps - #define _mm256_mask_dpbf16_ps(src, k, a, b) simde_mm256_mask_dpbf16_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_dpbf16_ps (simde__mmask8 k, simde__m256 src, simde__m256bh a, simde__m256bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_dpbf16_ps(k, src, a, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dpbf16_ps - #define _mm256_maskz_dpbf16_ps(k, src, a, b) simde_mm256_maskz_dpbf16_ps(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_dpbf16_ps (simde__m512 src, simde__m512bh a, simde__m512bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - return _mm512_dpbf16_ps(src, a, b); - #else - simde__m512_private - src_ = simde__m512_to_private(src); - simde__m512bh_private - a_ = simde__m512bh_to_private(a), - b_ = simde__m512bh_to_private(b); - - #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - uint32_t x1 SIMDE_VECTOR(128); - uint32_t x2 SIMDE_VECTOR(128); - simde__m512_private - r1_[2], - r2_[2]; - - a_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - a_.u16, a_.u16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - b_.u16 = - SIMDE_SHUFFLE_VECTOR_( - 16, 64, - b_.u16, b_.u16, - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 - ); - - SIMDE_CONVERT_VECTOR_(x1, a_.u16); - SIMDE_CONVERT_VECTOR_(x2, b_.u16); - - x1 <<= 16; - x2 <<= 16; - - simde_memcpy(&r1_, &x1, sizeof(x1)); - simde_memcpy(&r2_, &x2, sizeof(x2)); - - src_.f32 += - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + - HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); - } - #endif - - return simde__m512_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) - #undef _mm512_dpbf16_ps - #define _mm512_dpbf16_ps(src, a, b) simde_mm512_dpbf16_ps(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_dpbf16_ps (simde__m512 src, simde__mmask16 k, simde__m512bh a, simde__m512bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - return _mm512_mask_dpbf16_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dpbf16_ps - #define _mm512_mask_dpbf16_ps(src, k, a, b) simde_mm512_mask_dpbf16_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_dpbf16_ps (simde__mmask16 k, simde__m512 src, simde__m512bh a, simde__m512bh b) { - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - return _mm512_maskz_dpbf16_ps(k, src, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_dpbf16_ps(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dpbf16_ps - #define _mm512_maskz_dpbf16_ps(k, src, a, b) simde_mm512_maskz_dpbf16_ps(k, src, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DPBF16_H) */ -/* :: End simde/x86/avx512/dpbf16.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dpbusd.h :: */ -#if !defined(SIMDE_X86_AVX512_DPBUSD_H) -#define SIMDE_X86_AVX512_DPBUSD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_dpbusd_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_dpbusd_epi32(src, a, b); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t x1_ SIMDE_VECTOR(64); - int32_t x2_ SIMDE_VECTOR(64); - simde__m128i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, - a_.u8, a_.u8, - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, - b_.i8, b_.i8, - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); - } - #endif - - return simde__m128i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_dpbusd_epi32 - #define _mm_dpbusd_epi32(src, a, b) simde_mm_dpbusd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_dpbusd_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_mask_dpbusd_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dpbusd_epi32 - #define _mm_mask_dpbusd_epi32(src, k, a, b) simde_mm_mask_dpbusd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_dpbusd_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_maskz_dpbusd_epi32(k, src, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dpbusd_epi32 - #define _mm_maskz_dpbusd_epi32(k, src, a, b) simde_mm_maskz_dpbusd_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_dpbusd_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_dpbusd_epi32(src, a, b); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - src_.m128i[0] = simde_mm_dpbusd_epi32(src_.m128i[0], a_.m128i[0], b_.m128i[0]); - src_.m128i[1] = simde_mm_dpbusd_epi32(src_.m128i[1], a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t x1_ SIMDE_VECTOR(128); - int32_t x2_ SIMDE_VECTOR(128); - simde__m256i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, - a_.u8, a_.u8, - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, - b_.i8, b_.i8, - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); - } - #endif - - return simde__m256i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_dpbusd_epi32 - #define _mm256_dpbusd_epi32(src, a, b) simde_mm256_dpbusd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_dpbusd_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_mask_dpbusd_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dpbusd_epi32 - #define _mm256_mask_dpbusd_epi32(src, k, a, b) simde_mm256_mask_dpbusd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_dpbusd_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_maskz_dpbusd_epi32(k, src, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dpbusd_epi32 - #define _mm256_maskz_dpbusd_epi32(k, src, a, b) simde_mm256_maskz_dpbusd_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_dpbusd_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_dpbusd_epi32(src, a, b); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - src_.m256i[0] = simde_mm256_dpbusd_epi32(src_.m256i[0], a_.m256i[0], b_.m256i[0]); - src_.m256i[1] = simde_mm256_dpbusd_epi32(src_.m256i[1], a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t x1_ SIMDE_VECTOR(256); - int32_t x2_ SIMDE_VECTOR(256); - simde__m512i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, - a_.u8, a_.u8, - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, - b_.i8, b_.i8, - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - src_.i32 += - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); - } - #endif - - return simde__m512i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_dpbusd_epi32 - #define _mm512_dpbusd_epi32(src, a, b) simde_mm512_dpbusd_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_dpbusd_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_mask_dpbusd_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dpbusd_epi32 - #define _mm512_mask_dpbusd_epi32(src, k, a, b) simde_mm512_mask_dpbusd_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_dpbusd_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_maskz_dpbusd_epi32(k, src, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpbusd_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dpbusd_epi32 - #define _mm512_maskz_dpbusd_epi32(k, src, a, b) simde_mm512_maskz_dpbusd_epi32(k, src, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DPBUSD_H) */ -/* :: End simde/x86/avx512/dpbusd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/dpbusds.h :: */ -#if !defined(SIMDE_X86_AVX512_DPBUSDS_H) -#define SIMDE_X86_AVX512_DPBUSDS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_dpbusds_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_dpbusds_epi32(src, a, b); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - uint32_t x1_ SIMDE_VECTOR(64); - int32_t x2_ SIMDE_VECTOR(64); - simde__m128i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, - a_.u8, a_.u8, - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 16, - b_.i8, b_.i8, - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(16) = - HEDLEY_REINTERPRET_CAST( - __typeof__(au), - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) - ); - uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); - uint32_t ru SIMDE_VECTOR(16) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) - ); - } - #endif - - return simde__m128i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_dpbusds_epi32 - #define _mm_dpbusds_epi32(src, a, b) simde_mm_dpbusds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_dpbusds_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_mask_dpbusds_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_dpbusds_epi32 - #define _mm_mask_dpbusds_epi32(src, k, a, b) simde_mm_mask_dpbusds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_dpbusds_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm_maskz_dpbusds_epi32(k, src, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_dpbusds_epi32 - #define _mm_maskz_dpbusds_epi32(k, src, a, b) simde_mm_maskz_dpbusds_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_dpbusds_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_dpbusds_epi32(src, a, b); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - src_.m128i[0] = simde_mm_dpbusds_epi32(src_.m128i[0], a_.m128i[0], b_.m128i[0]); - src_.m128i[1] = simde_mm_dpbusds_epi32(src_.m128i[1], a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - uint32_t x1_ SIMDE_VECTOR(128); - int32_t x2_ SIMDE_VECTOR(128); - simde__m256i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, - a_.u8, a_.u8, - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 32, - b_.i8, b_.i8, - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(32) = - HEDLEY_REINTERPRET_CAST( - __typeof__(au), - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) - ); - uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); - uint32_t ru SIMDE_VECTOR(32) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) - ); - } - #endif - - return simde__m256i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_dpbusds_epi32 - #define _mm256_dpbusds_epi32(src, a, b) simde_mm256_dpbusds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_dpbusds_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_mask_dpbusds_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_dpbusds_epi32 - #define _mm256_mask_dpbusds_epi32(src, k, a, b) simde_mm256_mask_dpbusds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_dpbusds_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm256_maskz_dpbusds_epi32(k, src, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_dpbusds_epi32 - #define _mm256_maskz_dpbusds_epi32(k, src, a, b) simde_mm256_maskz_dpbusds_epi32(k, src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_dpbusds_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_dpbusds_epi32(src, a, b); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - src_.m256i[0] = simde_mm256_dpbusds_epi32(src_.m256i[0], a_.m256i[0], b_.m256i[0]); - src_.m256i[1] = simde_mm256_dpbusds_epi32(src_.m256i[1], a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - uint32_t x1_ SIMDE_VECTOR(256); - int32_t x2_ SIMDE_VECTOR(256); - simde__m512i_private - r1_[4], - r2_[4]; - - a_.u8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, - a_.u8, a_.u8, - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 - ); - b_.i8 = - SIMDE_SHUFFLE_VECTOR_( - 8, 64, - b_.i8, b_.i8, - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, - 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, - 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, - 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 - ); - - SIMDE_CONVERT_VECTOR_(x1_, a_.u8); - SIMDE_CONVERT_VECTOR_(x2_, b_.i8); - - simde_memcpy(&r1_, &x1_, sizeof(x1_)); - simde_memcpy(&r2_, &x2_, sizeof(x2_)); - - uint32_t au SIMDE_VECTOR(64) = - HEDLEY_REINTERPRET_CAST( - __typeof__(au), - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + - (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) - ); - uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); - uint32_t ru SIMDE_VECTOR(64) = au + bu; - - au = (au >> 31) + INT32_MAX; - - uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); - src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { - src_.i32[i] = - simde_math_adds_i32( - src_.i32[i], - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + - HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) - ); - } - #endif - - return simde__m512i_from_private(src_); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_dpbusds_epi32 - #define _mm512_dpbusds_epi32(src, a, b) simde_mm512_dpbusds_epi32(src, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_dpbusds_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_mask_dpbusds_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_dpbusds_epi32 - #define _mm512_mask_dpbusds_epi32(src, k, a, b) simde_mm512_mask_dpbusds_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_dpbusds_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VNNI_NATIVE) - return _mm512_maskz_dpbusds_epi32(k, src, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpbusds_epi32(src, a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_dpbusds_epi32 - #define _mm512_maskz_dpbusds_epi32(k, src, a, b) simde_mm512_maskz_dpbusds_epi32(k, src, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DPBUSDS_H) */ -/* :: End simde/x86/avx512/dpbusds.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/expand.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Andrew Rodriguez - * 2021 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_EXPAND_H) -#define SIMDE_X86_AVX512_EXPAND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_expand_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_expand_epi32(src, k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - src_ = simde__m256i_to_private(src); - simde__m256i_private r_; - - size_t src_idx = 0; - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - if (k & (UINT64_C(1) << i)) { - r_.i32[i] = a_.i32[src_idx++]; - } else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_expand_epi32 - #define _mm256_mask_expand_epi32(src, k, a) simde_mm256_mask_expand_epi32((src), (k), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_expand_epi32(simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_expand_epi32(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - size_t src_idx = 0; - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - if (k & (UINT64_C(1) << i)) { - r_.i32[i] = a_.i32[src_idx++]; - } else { - r_.i32[i] = INT32_C(0); - } - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_expand_epi32 - #define _mm256_maskz_expand_epi32(k, a) simde_mm256_maskz_expand_epi32((k), (a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_EXPAND_H) */ -/* :: End simde/x86/avx512/expand.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fixupimm.h :: */ -#if !defined(SIMDE_X86_AVX512_FIXUPIMM_H) -#define SIMDE_X86_AVX512_FIXUPIMM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/flushsubnormal.h :: */ -#if !defined(SIMDE_X86_AVX512_FLUSHSUBNORMAL_H) -#define SIMDE_X86_AVX512_FLUSHSUBNORMAL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_flushsubnormal_ps (simde__m128 a) { - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; - } - - return simde__m128_from_private(a_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_flushsubnormal_ps (simde__m256 a) { - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; - } - - return simde__m256_from_private(a_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_flushsubnormal_ps (simde__m512 a) { - simde__m512_private a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; - } - - return simde__m512_from_private(a_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_flushsubnormal_pd (simde__m128d a) { - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; - } - - return simde__m128d_from_private(a_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_flushsubnormal_pd (simde__m256d a) { - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; - } - - return simde__m256d_from_private(a_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_flushsubnormal_pd (simde__m512d a) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; - } - - return simde__m512d_from_private(a_); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FLUSHSUBNORMAL_H) */ -/* :: End simde/x86/avx512/flushsubnormal.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fixupimm_ps (simde__m128 a, simde__m128 b, simde__m128i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - s_ = simde__m128_to_private(simde_x_mm_flushsubnormal_ps(b)); - simde__m128i_private c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassifyf(s_.f32[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i32[i] >> (select << 2)) & 15)) { - case 0: - r_.f32[i] = a_.f32[i]; - break; - case 1: - r_.f32[i] = b_.f32[i]; - break; - case 2: - r_.f32[i] = SIMDE_MATH_NANF; - break; - case 3: - r_.f32[i] = -SIMDE_MATH_NANF; - break; - case 4: - r_.f32[i] = -SIMDE_MATH_INFINITYF; - break; - case 5: - r_.f32[i] = SIMDE_MATH_INFINITYF; - break; - case 6: - r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - break; - case 7: - r_.f32[i] = SIMDE_FLOAT32_C(-0.0); - break; - case 8: - r_.f32[i] = SIMDE_FLOAT32_C(0.0); - break; - case 9: - r_.f32[i] = SIMDE_FLOAT32_C(-1.0); - break; - case 10: - r_.f32[i] = SIMDE_FLOAT32_C(1.0); - break; - case 11: - r_.f32[i] = SIMDE_FLOAT32_C(0.5); - break; - case 12: - r_.f32[i] = SIMDE_FLOAT32_C(90.0); - break; - case 13: - r_.f32[i] = SIMDE_MATH_PIF / 2; - break; - case 14: - r_.f32[i] = SIMDE_MATH_FLT_MAX; - break; - case 15: - r_.f32[i] = -SIMDE_MATH_FLT_MAX; - break; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_fixupimm_ps(a, b, c, imm8) _mm_fixupimm_ps(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_ps - #define _mm_fixupimm_ps(a, b, c, imm8) simde_mm_fixupimm_ps(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) _mm_mask_fixupimm_ps(a, k, b, c, imm8) -#else - #define simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm_mask_mov_ps(a, k, simde_mm_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_ps - #define _mm_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) _mm_maskz_fixupimm_ps(k, a, b, c, imm8) -#else - #define simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm_maskz_mov_ps(k, simde_mm_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_ps - #define _mm_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fixupimm_ps (simde__m256 a, simde__m256 b, simde__m256i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - s_ = simde__m256_to_private(simde_x_mm256_flushsubnormal_ps(b)); - simde__m256i_private c_ = simde__m256i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassifyf(s_.f32[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i32[i] >> (select << 2)) & 15)) { - case 0: - r_.f32[i] = a_.f32[i]; - break; - case 1: - r_.f32[i] = b_.f32[i]; - break; - case 2: - r_.f32[i] = SIMDE_MATH_NANF; - break; - case 3: - r_.f32[i] = -SIMDE_MATH_NANF; - break; - case 4: - r_.f32[i] = -SIMDE_MATH_INFINITYF; - break; - case 5: - r_.f32[i] = SIMDE_MATH_INFINITYF; - break; - case 6: - r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - break; - case 7: - r_.f32[i] = SIMDE_FLOAT32_C(-0.0); - break; - case 8: - r_.f32[i] = SIMDE_FLOAT32_C(0.0); - break; - case 9: - r_.f32[i] = SIMDE_FLOAT32_C(-1.0); - break; - case 10: - r_.f32[i] = SIMDE_FLOAT32_C(1.0); - break; - case 11: - r_.f32[i] = SIMDE_FLOAT32_C(0.5); - break; - case 12: - r_.f32[i] = SIMDE_FLOAT32_C(90.0); - break; - case 13: - r_.f32[i] = SIMDE_MATH_PIF / 2; - break; - case 14: - r_.f32[i] = SIMDE_MATH_FLT_MAX; - break; - case 15: - r_.f32[i] = -SIMDE_MATH_FLT_MAX; - break; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_fixupimm_ps(a, b, c, imm8) _mm256_fixupimm_ps(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_fixupimm_ps - #define _mm256_fixupimm_ps(a, b, c, imm8) simde_mm256_fixupimm_ps(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) _mm256_mask_fixupimm_ps(a, k, b, c, imm8) -#else - #define simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm256_mask_mov_ps(a, k, simde_mm256_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_fixupimm_ps - #define _mm256_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) _mm256_maskz_fixupimm_ps(k, a, b, c, imm8) -#else - #define simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_fixupimm_ps - #define _mm256_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fixupimm_ps (simde__m512 a, simde__m512 b, simde__m512i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - s_ = simde__m512_to_private(simde_x_mm512_flushsubnormal_ps(b)); - simde__m512i_private c_ = simde__m512i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassifyf(s_.f32[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i32[i] >> (select << 2)) & 15)) { - case 0: - r_.f32[i] = a_.f32[i]; - break; - case 1: - r_.f32[i] = b_.f32[i]; - break; - case 2: - r_.f32[i] = SIMDE_MATH_NANF; - break; - case 3: - r_.f32[i] = -SIMDE_MATH_NANF; - break; - case 4: - r_.f32[i] = -SIMDE_MATH_INFINITYF; - break; - case 5: - r_.f32[i] = SIMDE_MATH_INFINITYF; - break; - case 6: - r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - break; - case 7: - r_.f32[i] = SIMDE_FLOAT32_C(-0.0); - break; - case 8: - r_.f32[i] = SIMDE_FLOAT32_C(0.0); - break; - case 9: - r_.f32[i] = SIMDE_FLOAT32_C(-1.0); - break; - case 10: - r_.f32[i] = SIMDE_FLOAT32_C(1.0); - break; - case 11: - r_.f32[i] = SIMDE_FLOAT32_C(0.5); - break; - case 12: - r_.f32[i] = SIMDE_FLOAT32_C(90.0); - break; - case 13: - r_.f32[i] = SIMDE_MATH_PIF / 2; - break; - case 14: - r_.f32[i] = SIMDE_MATH_FLT_MAX; - break; - case 15: - r_.f32[i] = -SIMDE_MATH_FLT_MAX; - break; - } - } - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_fixupimm_ps(a, b, c, imm8) _mm512_fixupimm_ps(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fixupimm_ps - #define _mm512_fixupimm_ps(a, b, c, imm8) simde_mm512_fixupimm_ps(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) _mm512_mask_fixupimm_ps(a, k, b, c, imm8) -#else - #define simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm512_mask_mov_ps(a, k, simde_mm512_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fixupimm_ps - #define _mm512_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) _mm512_maskz_fixupimm_ps(k, a, b, c, imm8) -#else - #define simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_fixupimm_ps(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fixupimm_ps - #define _mm512_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fixupimm_ss (simde__m128 a, simde__m128 b, simde__m128i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - s_ = simde__m128_to_private(simde_x_mm_flushsubnormal_ps(b)); - simde__m128i_private c_ = simde__m128i_to_private(c); - - int32_t select = 1; - switch (simde_math_fpclassifyf(s_.f32[0])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f32[0] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[0] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f32[0] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i32[0] >> (select << 2)) & 15)) { - case 0: - b_.f32[0] = a_.f32[0]; - break; - case 2: - b_.f32[0] = SIMDE_MATH_NANF; - break; - case 3: - b_.f32[0] = -SIMDE_MATH_NANF; - break; - case 4: - b_.f32[0] = -SIMDE_MATH_INFINITYF; - break; - case 5: - b_.f32[0] = SIMDE_MATH_INFINITYF; - break; - case 6: - b_.f32[0] = s_.f32[0] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - break; - case 7: - b_.f32[0] = SIMDE_FLOAT32_C(-0.0); - break; - case 8: - b_.f32[0] = SIMDE_FLOAT32_C(0.0); - break; - case 9: - b_.f32[0] = SIMDE_FLOAT32_C(-1.0); - break; - case 10: - b_.f32[0] = SIMDE_FLOAT32_C(1.0); - break; - case 11: - b_.f32[0] = SIMDE_FLOAT32_C(0.5); - break; - case 12: - b_.f32[0] = SIMDE_FLOAT32_C(90.0); - break; - case 13: - b_.f32[0] = SIMDE_MATH_PIF / 2; - break; - case 14: - b_.f32[0] = SIMDE_MATH_FLT_MAX; - break; - case 15: - b_.f32[0] = -SIMDE_MATH_FLT_MAX; - break; - } - - return simde__m128_from_private(b_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_fixupimm_ss(a, b, c, imm8) _mm_fixupimm_ss(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_ss - #define _mm_fixupimm_ss(a, b, c, imm8) simde_mm_fixupimm_ss(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) _mm_mask_fixupimm_ss(a, k, b, c, imm8) -#else - #define simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) simde_mm_mask_mov_ps(a, ((k) | 14), simde_mm_fixupimm_ss(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_ss - #define _mm_mask_fixupimm_ss(a, k, b, c, imm8) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) _mm_maskz_fixupimm_ss(k, a, b, c, imm8) -#else - #define simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) simde_mm_maskz_mov_ps(((k) | 14), simde_mm_fixupimm_ss(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_ss - #define _mm_maskz_fixupimm_ss(k, a, b, c, imm8) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fixupimm_pd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - s_ = simde__m128d_to_private(simde_x_mm_flushsubnormal_pd(b)); - simde__m128i_private c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassify(s_.f64[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i64[i] >> (select << 2)) & 15)) { - case 0: - r_.f64[i] = a_.f64[i]; - break; - case 1: - r_.f64[i] = b_.f64[i]; - break; - case 2: - r_.f64[i] = SIMDE_MATH_NAN; - break; - case 3: - r_.f64[i] = -SIMDE_MATH_NAN; - break; - case 4: - r_.f64[i] = -SIMDE_MATH_INFINITY; - break; - case 5: - r_.f64[i] = SIMDE_MATH_INFINITY; - break; - case 6: - r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; - break; - case 7: - r_.f64[i] = SIMDE_FLOAT64_C(-0.0); - break; - case 8: - r_.f64[i] = SIMDE_FLOAT64_C(0.0); - break; - case 9: - r_.f64[i] = SIMDE_FLOAT64_C(-1.0); - break; - case 10: - r_.f64[i] = SIMDE_FLOAT64_C(1.0); - break; - case 11: - r_.f64[i] = SIMDE_FLOAT64_C(0.5); - break; - case 12: - r_.f64[i] = SIMDE_FLOAT64_C(90.0); - break; - case 13: - r_.f64[i] = SIMDE_MATH_PI / 2; - break; - case 14: - r_.f64[i] = SIMDE_MATH_DBL_MAX; - break; - case 15: - r_.f64[i] = -SIMDE_MATH_DBL_MAX; - break; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_fixupimm_pd(a, b, c, imm8) _mm_fixupimm_pd(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_pd - #define _mm_fixupimm_pd(a, b, c, imm8) simde_mm_fixupimm_pd(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) _mm_mask_fixupimm_pd(a, k, b, c, imm8) -#else - #define simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm_mask_mov_pd(a, k, simde_mm_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_pd - #define _mm_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) _mm_maskz_fixupimm_pd(k, a, b, c, imm8) -#else - #define simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm_maskz_mov_pd(k, simde_mm_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_pd - #define _mm_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fixupimm_pd (simde__m256d a, simde__m256d b, simde__m256i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - s_ = simde__m256d_to_private(simde_x_mm256_flushsubnormal_pd(b)); - simde__m256i_private c_ = simde__m256i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassify(s_.f64[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i64[i] >> (select << 2)) & 15)) { - case 0: - r_.f64[i] = a_.f64[i]; - break; - case 1: - r_.f64[i] = b_.f64[i]; - break; - case 2: - r_.f64[i] = SIMDE_MATH_NAN; - break; - case 3: - r_.f64[i] = -SIMDE_MATH_NAN; - break; - case 4: - r_.f64[i] = -SIMDE_MATH_INFINITY; - break; - case 5: - r_.f64[i] = SIMDE_MATH_INFINITY; - break; - case 6: - r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; - break; - case 7: - r_.f64[i] = SIMDE_FLOAT64_C(-0.0); - break; - case 8: - r_.f64[i] = SIMDE_FLOAT64_C(0.0); - break; - case 9: - r_.f64[i] = SIMDE_FLOAT64_C(-1.0); - break; - case 10: - r_.f64[i] = SIMDE_FLOAT64_C(1.0); - break; - case 11: - r_.f64[i] = SIMDE_FLOAT64_C(0.5); - break; - case 12: - r_.f64[i] = SIMDE_FLOAT64_C(90.0); - break; - case 13: - r_.f64[i] = SIMDE_MATH_PI / 2; - break; - case 14: - r_.f64[i] = SIMDE_MATH_DBL_MAX; - break; - case 15: - r_.f64[i] = -SIMDE_MATH_DBL_MAX; - break; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_fixupimm_pd(a, b, c, imm8) _mm256_fixupimm_pd(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_fixupimm_pd - #define _mm256_fixupimm_pd(a, b, c, imm8) simde_mm256_fixupimm_pd(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) _mm256_mask_fixupimm_pd(a, k, b, c, imm8) -#else - #define simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm256_mask_mov_pd(a, k, simde_mm256_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_fixupimm_pd - #define _mm256_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) _mm256_maskz_fixupimm_pd(k, a, b, c, imm8) -#else - #define simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_fixupimm_pd - #define _mm256_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fixupimm_pd (simde__m512d a, simde__m512d b, simde__m512i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - s_ = simde__m512d_to_private(simde_x_mm512_flushsubnormal_pd(b)); - simde__m512i_private c_ = simde__m512i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - int32_t select = 1; - switch (simde_math_fpclassify(s_.f64[i])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i64[i] >> (select << 2)) & 15)) { - case 0: - r_.f64[i] = a_.f64[i]; - break; - case 1: - r_.f64[i] = b_.f64[i]; - break; - case 2: - r_.f64[i] = SIMDE_MATH_NAN; - break; - case 3: - r_.f64[i] = -SIMDE_MATH_NAN; - break; - case 4: - r_.f64[i] = -SIMDE_MATH_INFINITY; - break; - case 5: - r_.f64[i] = SIMDE_MATH_INFINITY; - break; - case 6: - r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; - break; - case 7: - r_.f64[i] = SIMDE_FLOAT64_C(-0.0); - break; - case 8: - r_.f64[i] = SIMDE_FLOAT64_C(0.0); - break; - case 9: - r_.f64[i] = SIMDE_FLOAT64_C(-1.0); - break; - case 10: - r_.f64[i] = SIMDE_FLOAT64_C(1.0); - break; - case 11: - r_.f64[i] = SIMDE_FLOAT64_C(0.5); - break; - case 12: - r_.f64[i] = SIMDE_FLOAT64_C(90.0); - break; - case 13: - r_.f64[i] = SIMDE_MATH_PI / 2; - break; - case 14: - r_.f64[i] = SIMDE_MATH_DBL_MAX; - break; - case 15: - r_.f64[i] = -SIMDE_MATH_DBL_MAX; - break; - } - } - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_fixupimm_pd(a, b, c, imm8) _mm512_fixupimm_pd(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fixupimm_pd - #define _mm512_fixupimm_pd(a, b, c, imm8) simde_mm512_fixupimm_pd(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) _mm512_mask_fixupimm_pd(a, k, b, c, imm8) -#else - #define simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm512_mask_mov_pd(a, k, simde_mm512_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fixupimm_pd - #define _mm512_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) _mm512_maskz_fixupimm_pd(k, a, b, c, imm8) -#else - #define simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_fixupimm_pd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fixupimm_pd - #define _mm512_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fixupimm_sd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - s_ = simde__m128d_to_private(simde_x_mm_flushsubnormal_pd(b)); - simde__m128i_private c_ = simde__m128i_to_private(c); - - int32_t select = 1; - switch (simde_math_fpclassify(s_.f64[0])) { - case SIMDE_MATH_FP_NORMAL: - select = (s_.f64[0] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[0] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; - break; - case SIMDE_MATH_FP_ZERO: - select = 2; - break; - case SIMDE_MATH_FP_NAN: - select = 0; - break; - case SIMDE_MATH_FP_INFINITE: - select = ((s_.f64[0] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); - break; - } - - switch (((c_.i64[0] >> (select << 2)) & 15)) { - case 0: - b_.f64[0] = a_.f64[0]; - break; - case 1: - b_.f64[0] = b_.f64[0]; - break; - case 2: - b_.f64[0] = SIMDE_MATH_NAN; - break; - case 3: - b_.f64[0] = -SIMDE_MATH_NAN; - break; - case 4: - b_.f64[0] = -SIMDE_MATH_INFINITY; - break; - case 5: - b_.f64[0] = SIMDE_MATH_INFINITY; - break; - case 6: - b_.f64[0] = s_.f64[0] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; - break; - case 7: - b_.f64[0] = SIMDE_FLOAT64_C(-0.0); - break; - case 8: - b_.f64[0] = SIMDE_FLOAT64_C(0.0); - break; - case 9: - b_.f64[0] = SIMDE_FLOAT64_C(-1.0); - break; - case 10: - b_.f64[0] = SIMDE_FLOAT64_C(1.0); - break; - case 11: - b_.f64[0] = SIMDE_FLOAT64_C(0.5); - break; - case 12: - b_.f64[0] = SIMDE_FLOAT64_C(90.0); - break; - case 13: - b_.f64[0] = SIMDE_MATH_PI / 2; - break; - case 14: - b_.f64[0] = SIMDE_MATH_DBL_MAX; - break; - case 15: - b_.f64[0] = -SIMDE_MATH_DBL_MAX; - break; - } - - return simde__m128d_from_private(b_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_fixupimm_sd(a, b, c, imm8) _mm_fixupimm_sd(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_sd - #define _mm_fixupimm_sd(a, b, c, imm8) simde_mm_fixupimm_sd(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) _mm_mask_fixupimm_sd(a, k, b, c, imm8) -#else - #define simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) simde_mm_mask_mov_pd(a, ((k) | 2), simde_mm_fixupimm_sd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_sd - #define _mm_mask_fixupimm_sd(a, k, b, c, imm8) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) _mm_maskz_fixupimm_sd(k, a, b, c, imm8) -#else - #define simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) simde_mm_maskz_mov_pd(((k) | 2), simde_mm_fixupimm_sd(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_sd - #define _mm_maskz_fixupimm_sd(k, a, b, c, imm8) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FIXUPIMM_H) */ -/* :: End simde/x86/avx512/fixupimm.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fixupimm_round.h :: */ -#if !defined(SIMDE_X86_AVX512_FIXUPIMM_ROUND_H) -#define SIMDE_X86_AVX512_FIXUPIMM_ROUND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) _mm512_fixupimm_round_ps(a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_ps(a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_fixupimm_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_fixupimm_round_ps_envp; \ - int simde_mm512_fixupimm_round_ps_x = feholdexcept(&simde_mm512_fixupimm_round_ps_envp); \ - simde_mm512_fixupimm_round_ps_r = simde_mm512_fixupimm_ps(a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_fixupimm_round_ps_x == 0)) \ - fesetenv(&simde_mm512_fixupimm_round_ps_envp); \ - } \ - else { \ - simde_mm512_fixupimm_round_ps_r = simde_mm512_fixupimm_ps(a, b, c, imm8); \ - } \ - \ - simde_mm512_fixupimm_round_ps_r; \ - })) - #else - #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_ps(a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_fixupimm_round_ps (simde__m512 a, simde__m512 b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_fixupimm_ps(a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_fixupimm_ps(a, b, c, imm8); - #endif - } - else { - r = simde_mm512_fixupimm_ps(a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fixupimm_round_ps - #define _mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) _mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_mask_fixupimm_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_fixupimm_round_ps_envp; \ - int simde_mm512_mask_fixupimm_round_ps_x = feholdexcept(&simde_mm512_mask_fixupimm_round_ps_envp); \ - simde_mm512_mask_fixupimm_round_ps_r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_fixupimm_round_ps_x == 0)) \ - fesetenv(&simde_mm512_mask_fixupimm_round_ps_envp); \ - } \ - else { \ - simde_mm512_mask_fixupimm_round_ps_r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); \ - } \ - \ - simde_mm512_mask_fixupimm_round_ps_r; \ - })) - #else - #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_mask_fixupimm_round_ps (simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); - #endif - } - else { - r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fixupimm_round_ps - #define _mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) _mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_maskz_fixupimm_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_fixupimm_round_ps_envp; \ - int simde_mm512_maskz_fixupimm_round_ps_x = feholdexcept(&simde_mm512_maskz_fixupimm_round_ps_envp); \ - simde_mm512_maskz_fixupimm_round_ps_r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_fixupimm_round_ps_x == 0)) \ - fesetenv(&simde_mm512_maskz_fixupimm_round_ps_envp); \ - } \ - else { \ - simde_mm512_maskz_fixupimm_round_ps_r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); \ - } \ - \ - simde_mm512_maskz_fixupimm_round_ps_r; \ - })) - #else - #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_maskz_fixupimm_round_ps (simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); - #endif - } - else { - r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fixupimm_round_ps - #define _mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) _mm512_fixupimm_round_pd(a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_pd(a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_fixupimm_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_fixupimm_round_pd_envp; \ - int simde_mm512_fixupimm_round_pd_x = feholdexcept(&simde_mm512_fixupimm_round_pd_envp); \ - simde_mm512_fixupimm_round_pd_r = simde_mm512_fixupimm_pd(a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_fixupimm_round_pd_x == 0)) \ - fesetenv(&simde_mm512_fixupimm_round_pd_envp); \ - } \ - else { \ - simde_mm512_fixupimm_round_pd_r = simde_mm512_fixupimm_pd(a, b, c, imm8); \ - } \ - \ - simde_mm512_fixupimm_round_pd_r; \ - })) - #else - #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_pd(a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_fixupimm_round_pd (simde__m512d a, simde__m512d b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_fixupimm_pd(a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_fixupimm_pd(a, b, c, imm8); - #endif - } - else { - r = simde_mm512_fixupimm_pd(a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fixupimm_round_pd - #define _mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) _mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_mask_fixupimm_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_fixupimm_round_pd_envp; \ - int simde_mm512_mask_fixupimm_round_pd_x = feholdexcept(&simde_mm512_mask_fixupimm_round_pd_envp); \ - simde_mm512_mask_fixupimm_round_pd_r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_fixupimm_round_pd_x == 0)) \ - fesetenv(&simde_mm512_mask_fixupimm_round_pd_envp); \ - } \ - else { \ - simde_mm512_mask_fixupimm_round_pd_r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); \ - } \ - \ - simde_mm512_mask_fixupimm_round_pd_r; \ - })) - #else - #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_mask_fixupimm_round_pd (simde__m512d a, simde__mmask8 k, simde__m512d b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); - #endif - } - else { - r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fixupimm_round_pd - #define _mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) _mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_maskz_fixupimm_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_fixupimm_round_pd_envp; \ - int simde_mm512_maskz_fixupimm_round_pd_x = feholdexcept(&simde_mm512_maskz_fixupimm_round_pd_envp); \ - simde_mm512_maskz_fixupimm_round_pd_r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_fixupimm_round_pd_x == 0)) \ - fesetenv(&simde_mm512_maskz_fixupimm_round_pd_envp); \ - } \ - else { \ - simde_mm512_maskz_fixupimm_round_pd_r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); \ - } \ - \ - simde_mm512_maskz_fixupimm_round_pd_r; \ - })) - #else - #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_maskz_fixupimm_round_pd (simde__mmask8 k, simde__m512d a, simde__m512d b, simde__m512i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); - #endif - } - else { - r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fixupimm_round_pd - #define _mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) _mm_fixupimm_round_ss(a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_ss(a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_fixupimm_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_fixupimm_round_ss_envp; \ - int simde_mm_fixupimm_round_ss_x = feholdexcept(&simde_mm_fixupimm_round_ss_envp); \ - simde_mm_fixupimm_round_ss_r = simde_mm_fixupimm_ss(a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_fixupimm_round_ss_x == 0)) \ - fesetenv(&simde_mm_fixupimm_round_ss_envp); \ - } \ - else { \ - simde_mm_fixupimm_round_ss_r = simde_mm_fixupimm_ss(a, b, c, imm8); \ - } \ - \ - simde_mm_fixupimm_round_ss_r; \ - })) - #else - #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_ss(a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_fixupimm_round_ss (simde__m128 a, simde__m128 b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_fixupimm_ss(a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_fixupimm_ss(a, b, c, imm8); - #endif - } - else { - r = simde_mm_fixupimm_ss(a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_round_ss - #define _mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) _mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_mask_fixupimm_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_fixupimm_round_ss_envp; \ - int simde_mm_mask_fixupimm_round_ss_x = feholdexcept(&simde_mm_mask_fixupimm_round_ss_envp); \ - simde_mm_mask_fixupimm_round_ss_r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_fixupimm_round_ss_x == 0)) \ - fesetenv(&simde_mm_mask_fixupimm_round_ss_envp); \ - } \ - else { \ - simde_mm_mask_fixupimm_round_ss_r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); \ - } \ - \ - simde_mm_mask_fixupimm_round_ss_r; \ - })) - #else - #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_mask_fixupimm_round_ss (simde__m128 a, simde__mmask8 k, simde__m128 b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); - #endif - } - else { - r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_round_ss - #define _mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) _mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_maskz_fixupimm_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_fixupimm_round_ss_envp; \ - int simde_mm_maskz_fixupimm_round_ss_x = feholdexcept(&simde_mm_maskz_fixupimm_round_ss_envp); \ - simde_mm_maskz_fixupimm_round_ss_r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_fixupimm_round_ss_x == 0)) \ - fesetenv(&simde_mm_maskz_fixupimm_round_ss_envp); \ - } \ - else { \ - simde_mm_maskz_fixupimm_round_ss_r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); \ - } \ - \ - simde_mm_maskz_fixupimm_round_ss_r; \ - })) - #else - #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_maskz_fixupimm_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); - #endif - } - else { - r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_round_ss - #define _mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) _mm_fixupimm_round_sd(a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_sd(a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_fixupimm_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_fixupimm_round_sd_envp; \ - int simde_mm_fixupimm_round_sd_x = feholdexcept(&simde_mm_fixupimm_round_sd_envp); \ - simde_mm_fixupimm_round_sd_r = simde_mm_fixupimm_sd(a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_fixupimm_round_sd_x == 0)) \ - fesetenv(&simde_mm_fixupimm_round_sd_envp); \ - } \ - else { \ - simde_mm_fixupimm_round_sd_r = simde_mm_fixupimm_sd(a, b, c, imm8); \ - } \ - \ - simde_mm_fixupimm_round_sd_r; \ - })) - #else - #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_sd(a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_fixupimm_round_sd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_fixupimm_sd(a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_fixupimm_sd(a, b, c, imm8); - #endif - } - else { - r = simde_mm_fixupimm_sd(a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_fixupimm_round_sd - #define _mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) _mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_mask_fixupimm_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_fixupimm_round_sd_envp; \ - int simde_mm_mask_fixupimm_round_sd_x = feholdexcept(&simde_mm_mask_fixupimm_round_sd_envp); \ - simde_mm_mask_fixupimm_round_sd_r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_fixupimm_round_sd_x == 0)) \ - fesetenv(&simde_mm_mask_fixupimm_round_sd_envp); \ - } \ - else { \ - simde_mm_mask_fixupimm_round_sd_r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); \ - } \ - \ - simde_mm_mask_fixupimm_round_sd_r; \ - })) - #else - #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_mask_fixupimm_round_sd (simde__m128d a, simde__mmask8 k, simde__m128d b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); - #endif - } - else { - r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fixupimm_round_sd - #define _mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) _mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_maskz_fixupimm_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_fixupimm_round_sd_envp; \ - int simde_mm_maskz_fixupimm_round_sd_x = feholdexcept(&simde_mm_maskz_fixupimm_round_sd_envp); \ - simde_mm_maskz_fixupimm_round_sd_r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_fixupimm_round_sd_x == 0)) \ - fesetenv(&simde_mm_maskz_fixupimm_round_sd_envp); \ - } \ - else { \ - simde_mm_maskz_fixupimm_round_sd_r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); \ - } \ - \ - simde_mm_maskz_fixupimm_round_sd_r; \ - })) - #else - #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_maskz_fixupimm_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, simde__m128i c, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); - #endif - } - else { - r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fixupimm_round_sd - #define _mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FIXUPIMM_ROUND_H) */ -/* :: End simde/x86/avx512/fixupimm_round.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fmadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_FMADD_H) -#define SIMDE_X86_AVX512_FMADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/fma.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2019 Evan Nemerson - */ - -#if !defined(SIMDE_X86_FMA_H) -#define SIMDE_X86_FMA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_pd(a, b, c); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); - #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_pd - #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_pd(a, b, c); - #else - return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_pd - #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_ps(a, b, c); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ps - #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_ps(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); - } - - return simde__m256_from_private(r_); - #else - return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_ps - #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_sd(a, b, c); - #else - return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_sd - #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_ss(a, b, c); - #else - return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ss - #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_pd(a, b, c); - #else - return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_pd - #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_pd(a, b, c); - #else - return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_pd - #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_ps(a, b, c); - #else - return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_ps - #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_ps(a, b, c); - #else - return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_ps - #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_pd(a, b, c); - #else - return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_pd - #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_pd(a, b, c); - #else - return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_pd - #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_ps(a, b, c); - #else - return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ps - #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_ps(a, b, c); - #else - return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_ps - #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_sd(a, b, c); - #else - return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_sd - #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_ss(a, b, c); - #else - return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ss - #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_pd - #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_pd - #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_ps - #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_ps - #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_pd - #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_pd - #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ps - #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_ps - #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_sd - #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = a_; - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ss - #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_pd - #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_pd - #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ps - #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_ps - #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_sd - #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = simde__m128_to_private(a); - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ss - #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_FMA_H) */ -/* :: End simde/x86/fma.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = (a_.f32 * b_.f32) + c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_ps - #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_fmadd_ps(a, k, b, c); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fmadd_ps - #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_fmadd_ps(k, a, b, c); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fmadd_ps - #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = (a_.f64 * b_.f64) + c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_pd - #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FMADD_H) */ -/* :: End simde/x86/avx512/fmadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fmsub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 kitegi - */ - -#if !defined(SIMDE_X86_AVX512_FMSUB_H) -#define SIMDE_X86_AVX512_FMSUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask3_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c, simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask3_fmsub_pd(a, b, c, k); - #else - return simde_mm256_mask_mov_pd(c, k, simde_mm256_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask3_fmsub_pd - #define _mm256_mask3_fmsub_pd(a, b, c, k) simde_mm256_mask3_fmsub_pd(a, b, c, k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_fmsub_pd (simde__m256d a, simde__mmask8 k, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_fmsub_pd(a, k, b, c); - #else - return simde_mm256_mask_mov_pd(a, k, simde_mm256_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_fmsub_pd - #define _mm256_mask_fmsub_pd(a, k, b, c) simde_mm256_mask_fmsub_pd(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_fmsub_pd (simde__mmask8 k, simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_fmsub_pd(k, a, b, c); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_fmsub_pd - #define _mm256_maskz_fmsub_pd(k, a, b, c) simde_mm256_maskz_fmsub_pd(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask3_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c, simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask3_fmsub_pd(a, b, c, k); - #else - return simde_mm_mask_mov_pd(c, k, simde_mm_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask3_fmsub_pd - #define _mm_mask3_fmsub_pd(a, b, c, k) simde_mm_mask3_fmsub_pd(a, b, c, k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_fmsub_pd (simde__m128d a, simde__mmask8 k, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_fmsub_pd(a, k, b, c); - #else - return simde_mm_mask_mov_pd(a, k, simde_mm_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fmsub_pd - #define _mm_mask_fmsub_pd(a, k, b, c) simde_mm_mask_fmsub_pd(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_fmsub_pd (simde__mmask8 k, simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_fmsub_pd(k, a, b, c); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_fmsub_pd(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fmsub_pd - #define _mm_maskz_fmsub_pd(k, a, b, c) simde_mm_maskz_fmsub_pd(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask3_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c, simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask3_fmsub_ps(a, b, c, k); - #else - return simde_mm256_mask_mov_ps(c, k, simde_mm256_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask3_fmsub_ps - #define _mm256_mask3_fmsub_ps(a, b, c, k) simde_mm256_mask3_fmsub_ps(a, b, c, k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_fmsub_ps (simde__m256 a, simde__mmask8 k, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_fmsub_ps(a, k, b, c); - #else - return simde_mm256_mask_mov_ps(a, k, simde_mm256_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_fmsub_ps - #define _mm256_mask_fmsub_ps(a, k, b, c) simde_mm256_mask_fmsub_ps(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_fmsub_ps (simde__mmask8 k, simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_fmsub_ps(k, a, b, c); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_fmsub_ps - #define _mm256_maskz_fmsub_ps(k, a, b, c) simde_mm256_maskz_fmsub_ps(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask3_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c, simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask3_fmsub_ps(a, b, c, k); - #else - return simde_mm_mask_mov_ps(c, k, simde_mm_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask3_fmsub_ps - #define _mm_mask3_fmsub_ps(a, b, c, k) simde_mm_mask3_fmsub_ps(a, b, c, k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_fmsub_ps (simde__m128 a, simde__mmask8 k, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_fmsub_ps(a, k, b, c); - #else - return simde_mm_mask_mov_ps(a, k, simde_mm_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_fmsub_ps - #define _mm_mask_fmsub_ps(a, k, b, c) simde_mm_mask_fmsub_ps(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_fmsub_ps (simde__mmask8 k, simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_fmsub_ps(k, a, b, c); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_fmsub_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_fmsub_ps - #define _mm_maskz_fmsub_ps(k, a, b, c) simde_mm_maskz_fmsub_ps(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmsub_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = (a_.f32 * b_.f32) - c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmsub_ps - #define _mm512_fmsub_ps(a, b, c) simde_mm512_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmsub_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = (a_.f64 * b_.f64) - c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmsub_pd - #define _mm512_fmsub_pd(a, b, c) simde_mm512_fmsub_pd(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FMSUB_H) */ -/* :: End simde/x86/avx512/fmsub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fnmadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 kitegi - */ - -#if !defined(SIMDE_X86_AVX512_FNMADD_H) -#define SIMDE_X86_AVX512_FNMADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fnmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fnmadd_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fnmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = -(a_.f32 * b_.f32) + c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fnmadd_ps - #define _mm512_fnmadd_ps(a, b, c) simde_mm512_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fnmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fnmadd_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fnmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = -(a_.f64 * b_.f64) + c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fnmadd_pd - #define _mm512_fnmadd_pd(a, b, c) simde_mm512_fnmadd_pd(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FNMADD_H) */ -/* :: End simde/x86/avx512/fnmadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fnmsub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 kitegi - */ - -#if !defined(SIMDE_X86_AVX512_FNMSUB_H) -#define SIMDE_X86_AVX512_FNMSUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fnmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fnmsub_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fnmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = -(a_.f32 * b_.f32) - c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fnmsub_ps - #define _mm512_fnmsub_ps(a, b, c) simde_mm512_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fnmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fnmsub_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fnmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = -(a_.f64 * b_.f64) - c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fnmsub_pd - #define _mm512_fnmsub_pd(a, b, c) simde_mm512_fnmsub_pd(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FNMSUB_H) */ -/* :: End simde/x86/avx512/fnmsub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fpclass.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_FPCLASS_H) -#define SIMDE_X86_AVX512_FPCLASS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_fpclass_ps_mask(simde__m256 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x88) { - simde__mmask8 r = 0; - simde__m256_private a_ = simde__m256_to_private(a); - - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= simde_math_fpclassf(a_.f32[i], imm8) ? (UINT8_C(1) << i) : 0; - } - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) -# define simde_mm256_fpclass_ps_mask(a, imm8) _mm256_fpclass_ps_mask((a), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) -# undef _mm256_fpclass_ps_mask -# define _mm256_fpclass_ps_mask(a, imm8) simde_mm256_fpclass_ps_mask((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_fpclass_ph_mask(simde__m512h a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x88) { - simde__mmask32 r = 0; - simde__m512h_private a_ = simde__m512h_to_private(a); - - for (size_t i = 0 ; i < (sizeof(a_.f16) / sizeof(a_.f16[0])) ; i++) { - r |= simde_fpclasshf(a_.f16[i], imm8) ? (UINT8_C(1) << i) : 0; - } - return r; -} -#if defined(SIMDE_X86_AVX512FP16_NATIVE) -# define simde_mm512_fpclass_ph_mask(a, imm8) _mm512_fpclass_ph_mask((a), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) -# undef _mm512_fpclass_ph_mask -# define _mm512_fpclass_ph_mask(a, imm8) simde_mm512_fpclass_ph_mask((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_fpclass_pd_mask(simde__m512d a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x88) { - simde__mmask8 r = 0; - simde__m512d_private a_ = simde__m512d_to_private(a); - - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= simde_math_fpclass(a_.f64[i], imm8) ? (UINT8_C(1) << i) : 0; - } - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) -# define simde_mm512_fpclass_pd_mask(a, imm8) _mm512_fpclass_pd_mask((a), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -# undef _mm512_fpclass_pd_mask -# define _mm512_fpclass_pd_mask(a, imm8) simde_mm512_fpclass_pd_mask((a), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FPCLASS_H) */ -/* :: End simde/x86/avx512/fpclass.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/gather.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_GATHER_H) -#define SIMDE_X86_AVX512_GATHER_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_i32gather_ps(simde__m512i vindex, const void* base_addr, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m512i_private vindex_ = simde__m512i_to_private(vindex); - simde__m512_private r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0)) - #define simde_mm512_i32gather_ps(vindex, base_addr, scale) _mm512_i32gather_ps((vindex), (base_addr), (scale)) -#elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i32gather_ps(vindex, base_addr, scale) SIMDE_STATEMENT_EXPR_(({\ - simde__m512_private simde_mm512_i32gather_ps_r_; \ - simde__m512i_private simde_mm512_i32gather_ps_vindex_ = simde__m512i_to_private((vindex)); \ - simde_mm512_i32gather_ps_r_.m256[0] = _mm256_i32gather_ps( \ - HEDLEY_STATIC_CAST(float const*, (base_addr)), simde_mm512_i32gather_ps_vindex_.m256i[0], (scale)); \ - simde_mm512_i32gather_ps_r_.m256[1] = _mm256_i32gather_ps( \ - HEDLEY_STATIC_CAST(float const*, (base_addr)), simde_mm512_i32gather_ps_vindex_.m256i[1], (scale)); \ - simde__m512_from_private(simde_mm512_i32gather_ps_r_); \ - })) -#elif defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i32gather_ps(vindex, base_addr, scale) \ - simde_x_mm512_set_m256( \ - _mm256_i32gather_ps(HEDLEY_STATIC_CAST(float const*, (base_addr)), \ - simde_mm512_extracti32x8_epi32((vindex), 1), (scale)), \ - _mm256_i32gather_ps(HEDLEY_STATIC_CAST(float const*, (base_addr)), \ - simde_mm512_extracti32x8_epi32((vindex), 0), (scale)) ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_i32gather_ps - #define _mm512_i32gather_ps(vindex, base_addr, scale) simde_mm512_i32gather_ps((vindex), (base_addr), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_i64gather_epi32(simde__m512i vindex, const void* base_addr, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m512i_private vindex_; - simde__m256i_private r_; - vindex_ = simde__m512i_to_private(vindex); - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_i64gather_epi32(vindex, base_addr, scale) _mm512_i64gather_epi32((vindex), (base_addr), (scale)) -#elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_epi32(vindex, base_addr, scale) SIMDE_STATEMENT_EXPR_(({\ - simde__m256i_private simde_mm512_i64gather_epi32_r_; \ - simde__m512i_private simde_mm512_i64gather_epi32_vindex_ = simde__m512i_to_private((vindex)); \ - simde_mm512_i64gather_epi32_r_.m128i[0] = _mm256_i64gather_epi32( \ - HEDLEY_STATIC_CAST(int const*, (base_addr)), simde_mm512_i64gather_epi32_vindex_.m256i[0], (scale)); \ - simde_mm512_i64gather_epi32_r_.m128i[1] = _mm256_i64gather_epi32( \ - HEDLEY_STATIC_CAST(int const*, (base_addr)), simde_mm512_i64gather_epi32_vindex_.m256i[1], (scale)); \ - simde__m256i_from_private(simde_mm512_i64gather_epi32_r_); \ - })) -#elif defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_epi32(vindex, base_addr, scale) \ - _mm256_insertf128_si256( \ - _mm256_castsi128_si256( \ - _mm256_i64gather_epi32(HEDLEY_STATIC_CAST(int const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 0), (scale))), \ - _mm256_i64gather_epi32(HEDLEY_STATIC_CAST(int const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 1), (scale)), \ - 1) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_i64gather_epi32 - #define _mm512_i64gather_epi32(vindex, base_addr, scale) simde_mm512_i64gather_epi32((vindex), (base_addr), (scale)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_i64gather_epi32(src, k, vindex, base_addr, scale) _mm512_mask_i64gather_epi32((src), (k), (vindex), (base_addr), (scale)) -#else - #define simde_mm512_mask_i64gather_epi32(src, k, vindex, base_addr, scale) simde_mm256_mask_mov_epi32(src, k, simde_mm512_i64gather_epi32((vindex), (base_addr), (scale))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_i64gather_epi32 - #define _mm512_mask_i64gather_epi32(src, k, vindex, base_addr, scale) simde_mm512_mask_i64gather_epi32((src), (k), (vindex), (base_addr), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_i64gather_epi64(simde__m512i vindex, const void* base_addr, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m512i_private - vindex_ = simde__m512i_to_private(vindex), - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_i64gather_epi64(vindex, base_addr, scale) _mm512_i64gather_epi64((vindex), (base_addr), (scale)) -#elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_epi64(vindex, base_addr, scale) SIMDE_STATEMENT_EXPR_(({\ - simde__m512i_private simde_mm512_i64gather_epi64_r_, \ - simde_mm512_i64gather_epi64_vindex_ = simde__m512i_to_private((vindex)); \ - simde_mm512_i64gather_epi64_r_.m256i[0] = _mm256_i64gather_epi64( \ - HEDLEY_STATIC_CAST(long long const*, (base_addr)), simde_mm512_i64gather_epi64_vindex_.m256i[0], (scale)); \ - simde_mm512_i64gather_epi64_r_.m256i[1] = _mm256_i64gather_epi64( \ - HEDLEY_STATIC_CAST(long long const*, (base_addr)), simde_mm512_i64gather_epi64_vindex_.m256i[1], (scale)); \ - simde__m512i_from_private(simde_mm512_i64gather_epi64_r_); \ - })) -#elif defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_epi64(vindex, base_addr, scale) \ - simde_x_mm512_set_m256i( \ - _mm256_i64gather_epi64(HEDLEY_STATIC_CAST(long long const*, (base_addr)), \ - simde_mm512_extracti32x8_epi32((vindex), 1), (scale)), \ - _mm256_i64gather_epi64(HEDLEY_STATIC_CAST(long long const*, (base_addr)), \ - simde_mm512_extracti32x8_epi32((vindex), 0), (scale)) ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_i64gather_epi64 - #define _mm512_i64gather_epi64(vindex, base_addr, scale) simde_mm512_i64gather_epi64(vindex, (base_addr), (scale)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_i64gather_epi64(src, k, vindex, base_addr, scale) _mm512_mask_i64gather_epi64((src), (k), (vindex), (base_addr), (scale)) -#else - #define simde_mm512_mask_i64gather_epi64(src, k, vindex, base_addr, scale) simde_mm512_mask_mov_epi64((src), (k), simde_mm512_i64gather_epi64((vindex), (base_addr), (scale))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_i64gather_epi64 - #define _mm512_mask_i64gather_epi64(src, k, vindex, base_addr, scale) simde_mm512_mask_i64gather_epi64((src), (k), (vindex), (base_addr), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_i64gather_pd(simde__m512i vindex, const void* base_addr, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m512i_private vindex_; - simde__m512d_private r_; - vindex_ = simde__m512i_to_private(vindex); - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_i64gather_pd(vindex, base_addr, scale) _mm512_i64gather_pd((vindex), (base_addr), (scale)) -#elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_pd(vindex, base_addr, scale) SIMDE_STATEMENT_EXPR_(({\ - simde__m512d_private simde_mm512_i64gather_pd_r_; \ - simde__m512i_private simde_mm512_i64gather_pd_vindex_ = simde__m512i_to_private((vindex)); \ - simde_mm512_i64gather_pd_r_.m256d[0] = _mm256_i64gather_pd( \ - HEDLEY_STATIC_CAST(double const*, (base_addr)), simde_mm512_i64gather_pd_vindex_.m256i[0], (scale)); \ - simde_mm512_i64gather_pd_r_.m256d[1] = _mm256_i64gather_pd( \ - HEDLEY_STATIC_CAST(double const*, (base_addr)), simde_mm512_i64gather_pd_vindex_.m256i[1], (scale)); \ - simde__m512d_from_private(simde_mm512_i64gather_pd_r_); \ - })) -#elif defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_pd(vindex, base_addr, scale) \ - simde_x_mm512_set_m256d( \ - _mm256_i64gather_pd(HEDLEY_STATIC_CAST(double const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 1), (scale)), \ - _mm256_i64gather_pd(HEDLEY_STATIC_CAST(double const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 0), (scale)) ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_i64gather_pd - #define _mm512_i64gather_pd(vindex, base_addr, scale) simde_mm512_i64gather_pd((vindex), (base_addr), (scale)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_i64gather_pd(src, k, vindex, base_addr, scale) _mm512_mask_i64gather_pd((src), (k), (vindex), (base_addr), (scale)) -#else - #define simde_mm512_mask_i64gather_pd(src, k, vindex, base_addr, scale) simde_mm512_mask_mov_pd((src), (k), simde_mm512_i64gather_pd((vindex), (base_addr), (scale))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_i64gather_pd - #define _mm512_mask_i64gather_pd(src, k, vindex, base_addr, scale) simde_mm512_mask_i64gather_pd((src), (k), (vindex), (base_addr), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_i64gather_ps(simde__m512i vindex, const void* base_addr, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m512i_private vindex_; - simde__m256_private r_; - vindex_ = simde__m512i_to_private(vindex); - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_i64gather_ps(vindex, base_addr, scale) _mm512_i64gather_ps((vindex), (base_addr), (scale)) -#elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_ps(vindex, base_addr, scale) SIMDE_STATEMENT_EXPR_(({\ - simde__m256_private simde_mm512_i64gather_ps_r_; \ - simde__m512i_private simde_mm512_i64gather_ps_vindex_ = simde__m512i_to_private((vindex)); \ - simde_mm512_i64gather_ps_r_.m128[0] = _mm256_i64gather_ps( \ - HEDLEY_STATIC_CAST(float const*, (base_addr)), simde_mm512_i64gather_ps_vindex_.m256i[0], (scale)); \ - simde_mm512_i64gather_ps_r_.m128[1] = _mm256_i64gather_ps( \ - HEDLEY_STATIC_CAST(float const*, (base_addr)), simde_mm512_i64gather_ps_vindex_.m256i[1], (scale)); \ - simde__m256_from_private(simde_mm512_i64gather_ps_r_); \ - })) -#elif defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_i64gather_ps(vindex, base_addr, scale) \ - _mm256_insertf128_ps( \ - _mm256_castps128_ps256( \ - _mm256_i64gather_ps(HEDLEY_STATIC_CAST(float const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 0), (scale))), \ - _mm256_i64gather_ps(HEDLEY_STATIC_CAST(float const*, (base_addr)), \ - simde_mm512_extracti64x4_epi64((vindex), 1), (scale)), \ - 1) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_i64gather_ps - #define _mm512_i64gather_ps(vindex, base_addr, scale) simde_mm512_i64gather_ps((vindex), (base_addr), (scale)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_i64gather_ps(src, k, vindex, base_addr, scale) _mm512_mask_i64gather_ps((src), (k), (vindex), (base_addr), (scale)) -#else - #define simde_mm512_mask_i64gather_ps(src, k, vindex, base_addr, scale) simde_mm256_mask_mov_ps((src), (k), simde_mm512_i64gather_ps((vindex), (base_addr), (scale))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_i64gather_ps - #define _mm512_mask_i64gather_ps(src, k, vindex, base_addr, scale) simde_mm512_mask_i64gather_ps((src), (k), (vindex), (base_addr), (scale)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_GATHER_H) */ -/* :: End simde/x86/avx512/gather.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/insert.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_INSERT_H) -#define SIMDE_X86_AVX512_INSERT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - simde__m512 r; - switch(imm8) { - case 0: r = _mm512_insertf32x4(a, b, 0); break; - case 1: r = _mm512_insertf32x4(a, b, 1); break; - case 2: r = _mm512_insertf32x4(a, b, 2); break; - case 3: r = _mm512_insertf32x4(a, b, 3); break; - default: HEDLEY_UNREACHABLE(); r = simde_mm512_setzero_ps(); break; - } - return r; - #else - simde__m512_private a_ = simde__m512_to_private(a); - - a_.m128[imm8 & 3] = b; - - return simde__m512_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf32x4 - #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_insertf32x4 (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512 r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_mask_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_mask_mov_ps(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf32x4 - #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_insertf32x4 (simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512 r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_maskz_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_maskz_mov_ps(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf32x4 - #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - a_.m256d[imm8 & 1] = b; - - return simde__m512d_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf64x4 - #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_insertf64x4 (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_mask_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_mask_mov_pd(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf64x4 - #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_insertf64x4 (simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_maskz_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_maskz_mov_pd(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf64x4 - #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m128i[imm8 & 3] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti32x4 - #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti32x4 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_mask_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi32(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti32x4 - #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti32x4 (simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_maskz_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi32(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti32x4 - #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m256i[imm8 & 1] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti64x4 - #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti64x4 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_mask_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi64(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti64x4 - #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti64x4 (simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_maskz_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi64(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti64x4 - #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_insertf32x8 (simde__m512 a, simde__m256 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512_private a_ = simde__m512_to_private(a); - - a_.m256[imm8 & 1] = b; - - return simde__m512_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_insertf32x8(a, b, imm8) _mm512_insertf32x8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf32x8 - #define _mm512_insertf32x8(a, b, imm8) simde_mm512_insertf32x8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_insertf32x8(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512 r; - SIMDE_CONSTIFY_2_(_mm512_mask_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); - return r; - #else - simde__m512 r; - SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_mask_mov_ps(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf32x8 - #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_insertf32x8(simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512 r; - SIMDE_CONSTIFY_2_(_mm512_maskz_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); - return r; - #else - simde__m512 r; - SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_maskz_mov_ps(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf32x8 - #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_insertf64x2 (simde__m512d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - a_.m128d[imm8 & 3] = b; - - return simde__m512d_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_insertf64x2(a, b, imm8) _mm512_insertf64x2(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf64x2 - #define _mm512_insertf64x2(a, b, imm8) simde_mm512_insertf64x2(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_insertf64x2(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512d r; - SIMDE_CONSTIFY_4_(_mm512_mask_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); - return r; - #else - simde__m512d r; - SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_mask_mov_pd(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf64x2 - #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_insertf64x2(simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512d r; - SIMDE_CONSTIFY_4_(_mm512_maskz_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); - return r; - #else - simde__m512d r; - SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_maskz_mov_pd(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf64x2 - #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti32x8 (simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m256i[imm8 & 1] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_inserti32x8(a, b, imm8) _mm512_inserti32x8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti32x8 - #define _mm512_inserti32x8(a, b, imm8) simde_mm512_inserti32x8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti32x8(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_2_(_mm512_mask_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, src, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); - return simde_mm512_mask_mov_epi32(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti32x8 - #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti32x8(simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_2_(_mm512_maskz_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi32(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti32x8 - #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti64x2 (simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m128i[imm8 & 3] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_inserti64x2(a, b, imm8) _mm512_inserti64x2(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti64x2 - #define _mm512_inserti64x2(a, b, imm8) simde_mm512_inserti64x2(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti64x2(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_4_(_mm512_mask_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi64(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti64x2 - #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti64x2(simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_4_(_mm512_maskz_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi64(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti64x2 - #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_INSERT_H) */ -/* :: End simde/x86/avx512/insert.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/kand.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_KAND_H) -#define SIMDE_X86_AVX512_KAND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_kand (simde__mmask16 a, simde__mmask16 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_kand(a, b); - #else - return a & b; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_kand - #define _mm512_kand(a, b) simde_mm512_kand((a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_KAND_H) */ -/* :: End simde/x86/avx512/kand.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/kshift.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_KSHIFT_H) -#define SIMDE_X86_AVX512_KSHIFT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask16 - #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 31) ? (a << count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask32 - #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 63) ? (a << count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask64 - #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask8 - #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask16 - #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 31) ? (a >> count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask32 - #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 63) ? (a >> count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask64 - #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask8 - #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ -/* :: End simde/x86/avx512/kshift.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/knot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_KNOT_H) -#define SIMDE_X86_AVX512_KNOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_knot_mask8 (simde__mmask8 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _knot_mask8(a); - #else - return ~a; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _knot_mask8 - #define _knot_mask8(a) simde_knot_mask8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_knot_mask16 (simde__mmask16 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _knot_mask16(a); - #else - return ~a; - #endif -} -#define simde_mm512_knot(a) simde_knot_mask16(a) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _knot_mask16 - #undef _mm512_knot - #define _knot_mask16(a) simde_knot_mask16(a) - #define _mm512_knot(a) simde_knot_mask16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_knot_mask32 (simde__mmask32 a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _knot_mask32(a); - #else - return ~a; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _knot_mask32 - #define _knot_mask32(a) simde_knot_mask32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_knot_mask64 (simde__mmask64 a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _knot_mask64(a); - #else - return ~a; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _knot_mask64 - #define _knot_mask64(a) simde_knot_mask64(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_KNOT_H) */ -/* :: End simde/x86/avx512/knot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/kxor.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_KXOR_H) -#define SIMDE_X86_AVX512_KXOR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_kxor_mask8 (simde__mmask8 a, simde__mmask8 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _kxor_mask8(a, b); - #else - return a^b; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _kxor_mask8 - #define _kxor_mask8(a, b) simde_kxor_mask8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_kxor_mask16 (simde__mmask16 a, simde__mmask16 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _kxor_mask16(a, b); - #else - return a^b; - #endif -} -#define simde_mm512_kxor(a, b) simde_kxor_mask16(a, b) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _kxor_mask16 - #undef _mm512_kxor - #define _kxor_mask16(a, b) simde_kxor_mask16(a, b) - #define _mm512_kxor(a, b) simde_kxor_mask16(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_kxor_mask32 (simde__mmask32 a, simde__mmask32 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _kxor_mask32(a, b); - #else - return a^b; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kxor_mask32 - #define _kxor_mask32(a, b) simde_kxor_mask32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_kxor_mask64 (simde__mmask64 a, simde__mmask64 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) \ - && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - return _kxor_mask64(a, b); - #else - return a^b; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kxor_mask64 - #define _kxor_mask64(a, b) simde_kxor_mask64(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_KXOR_H) */ -/* :: End simde/x86/avx512/kxor.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/lzcnt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LZCNT_H) -#define SIMDE_X86_AVX512_LZCNT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if HEDLEY_MSVC_VERSION_CHECK(14,0,0) -#include -#pragma intrinsic(_BitScanReverse) - #if defined(_M_AMD64) || defined(_M_ARM64) - #pragma intrinsic(_BitScanReverse64) - #endif -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if \ - ( HEDLEY_HAS_BUILTIN(__builtin_clz) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) ) && \ - defined(__INT_MAX__) && defined(__LONG_MAX__) && defined(__LONG_LONG_MAX__) && \ - defined(__INT32_MAX__) && defined(__INT64_MAX__) - #if __INT_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) - #elif __LONG_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) - #elif __LONG_LONG_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) - #endif - - #if __INT_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) - #elif __LONG_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) - #elif __LONG_LONG_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) - #endif -#elif HEDLEY_MSVC_VERSION_CHECK(14,0,0) - static int simde_x_clz32(uint32_t x) { - unsigned long r; - _BitScanReverse(&r, x); - return 31 - HEDLEY_STATIC_CAST(int, r); - } - #define simde_x_clz32 simde_x_clz32 - - static int simde_x_clz64(uint64_t x) { - unsigned long r; - - #if defined(_M_AMD64) || defined(_M_ARM64) - _BitScanReverse64(&r, x); - return 63 - HEDLEY_STATIC_CAST(int, r); - #else - uint32_t high = HEDLEY_STATIC_CAST(uint32_t, x >> 32); - if (high != 0) - return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, high)); - else - return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, x & ~UINT32_C(0))) + 32; - #endif - } - #define simde_x_clz64 simde_x_clz64 -#endif - -#if !defined(simde_x_clz32) || !defined(simde_x_clz64) - static uint8_t simde_x_avx512cd_lz_lookup(const uint8_t value) { - static const uint8_t lut[256] = { - 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - return lut[value]; - }; - - #if !defined(simde_x_clz32) - static int simde_x_clz32(uint32_t x) { - size_t s = sizeof(x) * 8; - uint32_t r; - - while ((s -= 8) != 0) { - r = x >> s; - if (r != 0) - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + - (((sizeof(x) - 1) * 8) - s); - } - - if (x == 0) - return (int) ((sizeof(x) * 8) - 1); - else - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + - ((sizeof(x) - 1) * 8); - } - #endif - - #if !defined(simde_x_clz64) - static int simde_x_clz64(uint64_t x) { - size_t s = sizeof(x) * 8; - uint64_t r; - - while ((s -= 8) != 0) { - r = x >> s; - if (r != 0) - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + - (((sizeof(x) - 1) * 8) - s); - } - - if (x == 0) - return (int) ((sizeof(x) * 8) - 1); - else - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + - ((sizeof(x) - 1) * 8); - } - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lzcnt_epi32(simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_lzcnt_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/58827596/501126 */ - a = _mm_andnot_si128(_mm_srli_epi32(a, 8), a); - a = _mm_castps_si128(_mm_cvtepi32_ps(a)); - a = _mm_srli_epi32(a, 23); - a = _mm_subs_epu16(_mm_set1_epi32(158), a); - a = _mm_min_epi16(a, _mm_set1_epi32(32)); - return a; - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_cntlz(a_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (HEDLEY_UNLIKELY(a_.i32[i] == 0) ? HEDLEY_STATIC_CAST(int32_t, sizeof(int32_t) * CHAR_BIT) : HEDLEY_STATIC_CAST(int32_t, simde_x_clz32(HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])))); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_lzcnt_epi32 - #define _mm_lzcnt_epi32(a) simde_mm_lzcnt_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_lzcnt_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_mask_lzcnt_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_lzcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_lzcnt_epi32 - #define _mm_mask_lzcnt_epi32(src, k, a) simde_mm_mask_lzcnt_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_lzcnt_epi32(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_maskz_lzcnt_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_lzcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_lzcnt_epi32 - #define _mm_maskz_lzcnt_epi32(k, a) simde_mm_maskz_lzcnt_epi32(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LZCNT_H) */ -/* :: End simde/x86/avx512/lzcnt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/madd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Ashleigh Newman-Jones - */ - -#if !defined(SIMDE_X86_AVX512_MADD_H) -#define SIMDE_X86_AVX512_MADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_madd_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_madd_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_madd_epi16 - #define _mm_mask_madd_epi16(src, k, a, b) simde_mm_mask_madd_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_madd_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_madd_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_madd_epi16 - #define _mm_maskz_madd_epi16(k, a, b) simde_mm_maskz_madd_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_madd_epi16 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_madd_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_madd_epi16 - #define _mm256_mask_madd_epi16(src, k, a, b) simde_mm256_mask_madd_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_madd_epi16 (simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_madd_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_madd_epi16 - #define _mm256_maskz_madd_epi16(k, a, b) simde_mm256_maskz_madd_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_madd_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_madd_epi16(a, b); - #else - simde__m512i_private r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if 0 && SIMDE_NATURAL_VECTOR_SIZE_LE(256) || defined(SIMDE_BUG_CLANG_BAD_MADD) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_madd_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = - (HEDLEY_STATIC_CAST(int32_t, a_.i16[ i ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[ i ])) + - (HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i + 1])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_madd_epi16 - #define _mm512_madd_epi16(a, b) simde_mm512_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_madd_epi16 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_madd_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_madd_epi16 - #define _mm512_mask_madd_epi16(src, k, a, b) simde_mm512_mask_madd_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_madd_epi16 (simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_madd_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_madd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_madd_epi16 - #define _mm512_maskz_madd_epi16(k, a, b) simde_mm512_maskz_madd_epi16(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MADD_H) */ -/* :: End simde/x86/avx512/madd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/maddubs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Ashleigh Newman-Jones - */ - -#if !defined(SIMDE_X86_AVX512_MADDUBS_H) -#define SIMDE_X86_AVX512_MADDUBS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_maddubs_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_maddubs_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_maddubs_epi16 - #define _mm_mask_maddubs_epi16(src, k, a, b) simde_mm_mask_maddubs_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_maddubs_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE ) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_maddubs_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_maddubs_epi16 - #define _mm_maskz_maddubs_epi16(k, a, b) simde_mm_maskz_maddubs_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_maddubs_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_maddubs_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_maddubs_epi16 - #define _mm256_mask_maddubs_epi16(src, k, a, b) simde_mm256_mask_maddubs_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_maddubs_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_maddubs_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_maddubs_epi16 - #define _mm256_maskz_maddubs_epi16(k, a, b) simde_mm256_maskz_maddubs_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maddubs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maddubs_epi16(a, b); - #else - simde__m512i_private r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) || defined(SIMDE_BUG_CLANG_BAD_MADD) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_maddubs_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maddubs_epi16 - #define _mm512_maddubs_epi16(a, b) simde_mm512_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_maddubs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_maddubs_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_maddubs_epi16 - #define _mm512_mask_maddubs_epi16(src, k, a, b) simde_mm512_mask_maddubs_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_maddubs_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_maddubs_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_maddubs_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_maddubs_epi16 - #define _mm512_maskz_maddubs_epi16(k, a, b) simde_mm512_maskz_maddubs_epi16(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MADDUBS_H) */ -/* :: End simde/x86/avx512/maddubs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/max.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MAX_H) -#define SIMDE_X86_AVX512_MAX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_max_epi8(a, b) simde_mm512_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi8 - #define _mm512_mask_max_epi8(src, k, a, b) simde_mm512_mask_max_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi8 - #define _mm512_maskz_max_epi8(k, a, b) simde_mm512_maskz_max_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_max_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu8 - #define _mm512_max_epu8(a, b) simde_mm512_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu8 - #define _mm512_mask_max_epu8(src, k, a, b) simde_mm512_mask_max_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu8 - #define _mm512_maskz_max_epu8(k, a, b) simde_mm512_maskz_max_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_max_epi16(a, b) simde_mm512_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi16 - #define _mm512_mask_max_epi16(src, k, a, b) simde_mm512_mask_max_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi16 - #define _mm512_maskz_max_epi16(k, a, b) simde_mm512_maskz_max_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_max_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu16 - #define _mm512_max_epu16(a, b) simde_mm512_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu16 - #define _mm512_mask_max_epu16(src, k, a, b) simde_mm512_mask_max_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu16 - #define _mm512_maskz_max_epu16(k, a, b) simde_mm512_maskz_max_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epi32 - #define _mm512_max_epi32(a, b) simde_mm512_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi32 - #define _mm512_mask_max_epi32(src, k, a, b) simde_mm512_mask_max_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi32 - #define _mm512_maskz_max_epi32(k, a, b) simde_mm512_maskz_max_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epu32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epu32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu32 - #define _mm512_max_epu32(a, b) simde_mm512_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu32 - #define _mm512_mask_max_epu32(src, k, a, b) simde_mm512_mask_max_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu32 - #define _mm512_maskz_max_epu32(k, a, b) simde_mm512_maskz_max_epu32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] > b_.i64[i] ? a_.i64[i] : b_.i64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epi64 - #define _mm512_max_epi64(a, b) simde_mm512_max_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi64 - #define _mm512_mask_max_epi64(src, k, a, b) simde_mm512_mask_max_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi64 - #define _mm512_maskz_max_epi64(k, a, b) simde_mm512_maskz_max_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? a_.u64[i] : b_.u64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu64 - #define _mm512_max_epu64(a, b) simde_mm512_max_epu64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epu64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu64 - #define _mm512_mask_max_epu64(src, k, a, b) simde_mm512_mask_max_epu64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epu64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu64 - #define _mm512_maskz_max_epu64(k, a, b) simde_mm512_maskz_max_epu64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_max_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[0] = simde_mm256_max_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_max_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] > b_.f32[i] ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_ps - #define _mm512_max_ps(a, b) simde_mm512_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_max_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_max_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_ps - #define _mm512_mask_max_ps(src, k, a, b) simde_mm512_mask_max_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_max_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_max_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_ps - #define _mm512_maskz_max_ps(k, a, b) simde_mm512_maskz_max_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_max_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] > b_.f64[i] ? a_.f64[i] : b_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_pd - #define _mm512_max_pd(a, b) simde_mm512_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_max_ph (simde__m512h a, simde__m512h b) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_max_ph(a, b); - #else - simde__m512h_private - r_, - a_ = simde__m512h_to_private(a), - b_ = simde__m512h_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.f16[i] = simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) ? a_.f16[i] : b_.f16[i]; - } - - return simde__m512h_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_ph - #define _mm512_max_ph(a, b) simde_mm512_max_ph(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_max_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_max_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_pd - #define _mm512_mask_max_pd(src, k, a, b) simde_mm512_mask_max_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_max_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_max_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_pd - #define _mm512_maskz_max_pd(k, a, b) simde_mm512_maskz_max_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MAX_H) */ -/* :: End simde/x86/avx512/max.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/min.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MIN_H) -#define SIMDE_X86_AVX512_MIN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_min_epi8(a, b) simde_mm512_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi8 - #define _mm512_mask_min_epi8(src, k, a, b) simde_mm512_mask_min_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi8 - #define _mm512_maskz_min_epi8(k, a, b) simde_mm512_maskz_min_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_min_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu8 - #define _mm512_min_epu8(a, b) simde_mm512_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu8 - #define _mm512_mask_min_epu8(src, k, a, b) simde_mm512_mask_min_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu8 - #define _mm512_maskz_min_epu8(k, a, b) simde_mm512_maskz_min_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_min_epi16(a, b) simde_mm512_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi16 - #define _mm512_mask_min_epi16(src, k, a, b) simde_mm512_mask_min_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi16 - #define _mm512_maskz_min_epi16(k, a, b) simde_mm512_maskz_min_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_min_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu16 - #define _mm512_min_epu16(a, b) simde_mm512_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu16 - #define _mm512_mask_min_epu16(src, k, a, b) simde_mm512_mask_min_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu16 - #define _mm512_maskz_min_epu16(k, a, b) simde_mm512_maskz_min_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epi32 - #define _mm512_min_epi32(a, b) simde_mm512_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi32 - #define _mm512_mask_min_epi32(src, k, a, b) simde_mm512_mask_min_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i - simde_mm512_maskz_min_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi32 - #define _mm512_maskz_min_epi32(k, a, b) simde_mm512_maskz_min_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epu32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epu32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu32 - #define _mm512_min_epu32(a, b) simde_mm512_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu32 - #define _mm512_mask_min_epu32(src, k, a, b) simde_mm512_mask_min_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu32 - #define _mm512_maskz_min_epu32(k, a, b) simde_mm512_maskz_min_epu32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] < b_.i64[i] ? a_.i64[i] : b_.i64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epi64 - #define _mm512_min_epi64(a, b) simde_mm512_min_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi64 - #define _mm512_mask_min_epi64(src, k, a, b) simde_mm512_mask_min_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi64 - #define _mm512_maskz_min_epi64(k, a, b) simde_mm512_maskz_min_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? a_.u64[i] : b_.u64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu64 - #define _mm512_min_epu64(a, b) simde_mm512_min_epu64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epu64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu64 - #define _mm512_mask_min_epu64(src, k, a, b) simde_mm512_mask_min_epu64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epu64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu64 - #define _mm512_maskz_min_epu64(k, a, b) simde_mm512_maskz_min_epu64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_min_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[0] = simde_mm256_min_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_min_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] < b_.f32[i] ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_ps - #define _mm512_min_ps(a, b) simde_mm512_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_min_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_min_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_ps - #define _mm512_mask_min_ps(src, k, a, b) simde_mm512_mask_min_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_min_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_min_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_ps - #define _mm512_maskz_min_ps(k, a, b) simde_mm512_maskz_min_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_min_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] < b_.f64[i] ? a_.f64[i] : b_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_pd - #define _mm512_min_pd(a, b) simde_mm512_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_min_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_min_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_pd - #define _mm512_mask_min_pd(src, k, a, b) simde_mm512_mask_min_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_min_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_min_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_pd - #define _mm512_maskz_min_pd(k, a, b) simde_mm512_maskz_min_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_min_ph (simde__m512h a, simde__m512h b) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_min_ph(a, b); - #else - simde__m512h_private - r_, - a_ = simde__m512h_to_private(a), - b_ = simde__m512h_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.f16[i] = simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i]) ? a_.f16[i] : b_.f16[i]; - } - - return simde__m512h_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_ph - #define _mm512_min_ph(a, b) simde_mm512_min_ph(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MIN_H) */ -/* :: End simde/x86/avx512/min.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mul.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MUL_H) -#define SIMDE_X86_AVX512_MUL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_ps - #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_ps - #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_ps - #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_pd - #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_pd - #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_pd - #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.i64) ta, tb; - - /* Get even numbered 32-bit values */ - x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - /* Cast to 64 bits */ - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); - r_.i64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); - } - #endif - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epi32 - #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epi32 - #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epi32 - #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.u64) ta, tb; - - x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); - r_.u64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epu32 - #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epu32 - #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epu32 - #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MUL_H) */ -/* :: End simde/x86/avx512/mul.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mulhi.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MULHI_H) -#define SIMDE_X86_AVX512_MULHI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mulhi_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mulhi_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mulhi_epi16 - #define _mm512_mulhi_epi16(a, b) simde_mm512_mulhi_epi16(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MULHI_H) */ -/* :: End simde/x86/avx512/mulhi.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mulhrs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MULHRS_H) -#define SIMDE_X86_AVX512_MULHRS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mulhrs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mulhrs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mulhrs_epi16 - #define _mm512_mulhrs_epi16(a, b) simde_mm512_mulhrs_epi16(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MULHRS_H) */ -/* :: End simde/x86/avx512/mulhrs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mullo.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MULLO_H) -#define SIMDE_X86_AVX512_MULLO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mullo_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mullo_epi16(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mullo_epi16 - #define _mm512_mullo_epi16(a, b) simde_mm512_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mullo_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mullo_epi32(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mullo_epi32 - #define _mm512_mullo_epi32(a, b) simde_mm512_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mullo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mullo_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_mullo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mullo_epi32 - #define _mm512_mask_mullo_epi32(src, k, a, b) simde_mm512_mask_mullo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mullo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mullo_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_mullo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mullo_epi32 - #define _mm512_maskz_mullo_epi32(k, a, b) simde_mm512_maskz_mullo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mullo_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mullo_epi64(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] * b_.i64[i]); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mullo_epi64 - #define _mm512_mullo_epi64(a, b) simde_mm512_mullo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mullo_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_mullo_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mullo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mullo_epi64 - #define _mm512_mask_mullo_epi64(src, k, a, b) simde_mm512_mask_mullo_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mullo_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_mullo_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mullo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mullo_epi64 - #define _mm512_maskz_mullo_epi64(k, a, b) simde_mm512_maskz_mullo_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MULLO_H) */ -/* :: End simde/x86/avx512/mullo.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/multishift.h :: */ -#if !defined(SIMDE_X86_AVX512_MULTISHIFT_H) -#define SIMDE_X86_AVX512_MULTISHIFT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_multishift_epi64_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_multishift_epi64_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_multishift_epi64_epi8 - #define _mm_multishift_epi64_epi8(a, b) simde_mm_multishift_epi64_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_multishift_epi64_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_multishift_epi64_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_multishift_epi64_epi8 - #define _mm_mask_multishift_epi64_epi8(src, k, a, b) simde_mm_mask_multishift_epi64_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_multishift_epi64_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_multishift_epi64_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_multishift_epi64_epi8 - #define _mm_maskz_multishift_epi64_epi8(k, a, b) simde_mm_maskz_multishift_epi64_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_multishift_epi64_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_multishift_epi64_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_multishift_epi64_epi8 - #define _mm256_multishift_epi64_epi8(a, b) simde_mm256_multishift_epi64_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_multishift_epi64_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_multishift_epi64_epi8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_multishift_epi64_epi8 - #define _mm256_mask_multishift_epi64_epi8(src, k, a, b) simde_mm256_mask_multishift_epi64_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_multishift_epi64_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_multishift_epi64_epi8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_multishift_epi64_epi8 - #define _mm256_maskz_multishift_epi64_epi8(k, a, b) simde_mm256_maskz_multishift_epi64_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_multishift_epi64_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_multishift_epi64_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_multishift_epi64_epi8 - #define _mm512_multishift_epi64_epi8(a, b) simde_mm512_multishift_epi64_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_multishift_epi64_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask_multishift_epi64_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_multishift_epi64_epi8 - #define _mm512_mask_multishift_epi64_epi8(src, k, a, b) simde_mm512_mask_multishift_epi64_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_multishift_epi64_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_maskz_multishift_epi64_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_multishift_epi64_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_multishift_epi64_epi8 - #define _mm512_maskz_multishift_epi64_epi8(k, a, b) simde_mm512_maskz_multishift_epi64_epi8(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MULTISHIFT_H) */ -/* :: End simde/x86/avx512/multishift.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/negate.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_NEGATE_H) -#define SIMDE_X86_AVX512_NEGATE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_negate_ps(simde__m512 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_negate_pd(simde__m512d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_NEGATE_H) */ -/* :: End simde/x86/avx512/negate.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/or.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_OR_H) -#define SIMDE_X86_AVX512_OR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_or_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_ps - #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_ps - #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_ps - #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_or_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_pd - #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_pd - #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_pd - #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 | b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] | b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi32 - #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi32 - #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi32 - #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi64 - #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi64 - #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi64 - #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_si512 - #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ -/* :: End simde/x86/avx512/or.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/packs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_PACKS_H) -#define SIMDE_X86_AVX512_PACKS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packs_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packs_epi16(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + octet_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[quarter_point + i] = (a_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[octet_point + i])); - r_.i8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[octet_point + i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - r_.i8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + octet_point + i])); - r_.i8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packs_epi16 - #define _mm512_packs_epi16(a, b) simde_mm512_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packs_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packs_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packs_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packs_epi32(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.i16[i] = (a_.i32[i] > INT16_MAX) ? INT16_MAX : ((a_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - r_.i16[i + octet_point] = (b_.i32[i] > INT16_MAX) ? INT16_MAX : ((b_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[i])); - r_.i16[quarter_point + i] = (a_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[octet_point + i])); - r_.i16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[octet_point + i])); - r_.i16[halfway_point + i] = (a_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point +i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + i])); - r_.i16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point +i])); - r_.i16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + octet_point + i])); - r_.i16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packs_epi32 - #define _mm512_packs_epi32(a, b) simde_mm512_packs_epi32(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PACKS_H) */ -/* :: End simde/x86/avx512/packs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/packus.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_PACKUS_H) -#define SIMDE_X86_AVX512_PACKUS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packus_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packus_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packus_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packus_epi16(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + octet_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[quarter_point + i] = (a_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[octet_point + i])); - r_.u8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[octet_point + i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - r_.u8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + octet_point + i])); - r_.u8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packus_epi16 - #define _mm512_packus_epi16(a, b) simde_mm512_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packus_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packus_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_packus_epi32(a_.m256i[i], b_.m256i[i]); - } - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + octet_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[quarter_point + i] = (a_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[octet_point + i])); - r_.u16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[octet_point + i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point +i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point +i])); - r_.u16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + octet_point + i])); - r_.u16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packus_epi32 - #define _mm512_packus_epi32(a, b) simde_mm512_packus_epi32(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PACKUS_H) */ -/* :: End simde/x86/avx512/packus.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/permutex.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_PERMUTEX_H) -#define SIMDE_X86_AVX512_PERMUTEX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex_epi64 (simde__m256i a, const int imm8) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[(imm8 >> (i*2)) & 3]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_permutex_epi64(a, imm8) _mm256_permutex_epi64((a), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex_epi64 - #define _mm256_permutex_epi64(a, imm8) simde_mm256_permutex_epi64((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex_epi64 (simde__m512i a, const int imm8) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private[0].i64) / sizeof(r_.m256i_private[0].i64[0])) ; i++) { - r_.m256i_private[0].i64[i] = a_.m256i_private[0].i64[(imm8 >> (i*2)) & 3]; - r_.m256i_private[1].i64[i] = a_.m256i_private[1].i64[(imm8 >> (i*2)) & 3]; - } - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_permutex_epi64(a, imm8) _mm512_permutex_epi64((a), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_permutex_epi64(a, imm8) SIMDE_STATEMENT_EXPR_(({\ - simde__m512i_private simde_mm512_permutex_epi64_a_ = simde__m512i_to_private((a)), simde_mm512_permutex_epi64_r_; \ - simde_mm512_permutex_epi64_r_.m256i[0] = simde_mm256_permutex_epi64(simde_mm512_permutex_epi64_a_.m256i[0], (imm8)); \ - simde_mm512_permutex_epi64_r_.m256i[1] = simde_mm256_permutex_epi64(simde_mm512_permutex_epi64_a_.m256i[1], (imm8)); \ - simde__m512i_from_private(simde_mm512_permutex_epi64_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex_epi64 - #define _mm512_permutex_epi64(a, imm8) simde_mm512_permutex_epi64((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_permutex_epi64(src, k, a, imm8) _mm512_mask_permutex_epi64((src), (k), (a), (imm8)) -#else - #define simde_mm512_mask_permutex_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64((src), (k), simde_mm512_permutex_epi64((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex_epi64 - #define _mm512_mask_permutex_epi64(src, k, a, imm8) simde_mm512_mask_permutex_epi64((src), (k), (a), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PERMUTEX_H) */ -/* :: End simde/x86/avx512/permutex.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/permutexvar.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) -#define SIMDE_X86_AVX512_PERMUTEXVAR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/slli.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SLLI_H) -#define SIMDE_X86_AVX512_SLLI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi16 (simde__m512i a, const unsigned int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_16_(_mm512_slli_epi16, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi16(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - if(imm8 < 16) - r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); - else - return simde_mm512_setzero_si512(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi16 - #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { - /* I guess the restriction was added in 6.4, back-ported to 5.5, then - * removed (fixed) in 7? */ - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_32_(_mm512_slli_epi32, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi32 - #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_64_(_mm512_slli_epi64, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi64(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) - r_.u64 = a_.u64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi64 - #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SLLI_H) */ -/* :: End simde/x86/avx512/slli.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srli.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRLI_H) -#define SIMDE_X86_AVX512_SRLI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi16 (simde__m512i a, const unsigned int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_16_(_mm512_srli_epi16, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi16(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi16 - #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_32_(_mm512_srli_epi32, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi32 - #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_64_(_mm512_srli_epi64, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi64(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff" here. However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) - r_.u64 = a_.u64 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi64 - #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRLI_H) */ -/* :: End simde/x86/avx512/srli.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/test.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_TEST_H) -#define SIMDE_X86_AVX512_TEST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_test_epi32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_test_epi32_mask(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_test_epi32_mask -#define _mm256_test_epi32_mask(a, b) simde_mm256_test_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_test_epi32_mask (simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_test_epi32_mask(k1, a, b); - #else - return simde_mm256_test_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_test_epi32_mask - #define _mm256_mask_test_epi32_mask(k1, a, b) simde_mm256_mask_test_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_test_epi16_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask32 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi16_mask - #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_test_epi32_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi32_mask -#define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_test_epi64_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi64_mask - #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_test_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi8_mask - #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_test_epi16_mask(k1, a, b); - #else - return simde_mm512_test_epi16_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi16_mask - #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi32_mask(k1, a, b); - #else - return simde_mm512_test_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi32_mask - #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi64_mask(k1, a, b); - #else - return simde_mm512_test_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi64_mask - #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_test_epi8_mask(k1, a, b); - #else - return simde_mm512_test_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi8_mask - #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TEST_H) */ -/* :: End simde/x86/avx512/test.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutexvar_epi16 (simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutexvar_epi16(idx, a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - simde__m128i mask16 = simde_mm_set1_epi16(0x0007); - simde__m128i shift16 = simde_mm_set1_epi16(0x0202); - simde__m128i byte_index16 = simde_mm_set1_epi16(0x0100); - simde__m128i index16 = simde_mm_and_si128(idx, mask16); - index16 = simde_mm_mullo_epi16(index16, shift16); - index16 = simde_mm_add_epi16(index16, byte_index16); - return simde_mm_shuffle_epi8(a, index16); - #else - simde__m128i_private - idx_ = simde__m128i_to_private(idx), - a_ = simde__m128i_to_private(a), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint16x8_t mask16 = vdupq_n_u16(0x0007); - uint16x8_t byte_index16 = vdupq_n_u16(0x0100); - uint16x8_t index16 = vandq_u16(idx_.neon_u16, mask16); - index16 = vmulq_n_u16(index16, 0x0202); - index16 = vaddq_u16(index16, byte_index16); - r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, vreinterpretq_u8_u16(index16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; - index16 = vec_and(idx_.altivec_u16, vec_splat_u16(7)); - index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); - r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t mask16 = wasm_i16x8_splat(0x0007); - const v128_t shift16 = wasm_i16x8_splat(0x0202); - const v128_t byte_index16 = wasm_i16x8_splat(0x0100); - v128_t index16 = wasm_v128_and(idx_.wasm_v128, mask16); - index16 = wasm_i16x8_mul(index16, shift16); - index16 = wasm_i16x8_add(index16, byte_index16); - r_.wasm_v128 = wasm_i8x16_swizzle(a_.wasm_v128, index16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[idx_.i16[i] & 0x07]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutexvar_epi16 - #define _mm_permutexvar_epi16(idx, a) simde_mm_permutexvar_epi16(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutexvar_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutexvar_epi16(src, k, idx, a); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutexvar_epi16 - #define _mm_mask_permutexvar_epi16(src, k, idx, a) simde_mm_mask_permutexvar_epi16(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutexvar_epi16 (simde__mmask8 k, simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutexvar_epi16(k, idx, a); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutexvar_epi16 - #define _mm_maskz_permutexvar_epi16(k, idx, a) simde_mm_maskz_permutexvar_epi16(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutexvar_epi8 (simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutexvar_epi8(idx, a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - simde__m128i mask = simde_mm_set1_epi8(0x0F); - simde__m128i index = simde_mm_and_si128(idx, mask); - return simde_mm_shuffle_epi8(a, index); - #else - simde__m128i_private - idx_ = simde__m128i_to_private(idx), - a_ = simde__m128i_to_private(a), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16_t mask = vdupq_n_u8(0x0F); - uint8x16_t index = vandq_u8(idx_.neon_u8, mask); - r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, index); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, idx_.altivec_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t mask = wasm_i8x16_splat(0x0F); - v128_t index = wasm_v128_and(idx_.wasm_v128, mask); - r_.wasm_v128 = wasm_i8x16_swizzle(a_.wasm_v128, index); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[idx_.i8[i] & 0x0F]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutexvar_epi8 - #define _mm_permutexvar_epi8(idx, a) simde_mm_permutexvar_epi8(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutexvar_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutexvar_epi8(src, k, idx, a); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutexvar_epi8 - #define _mm_mask_permutexvar_epi8(src, k, idx, a) simde_mm_mask_permutexvar_epi8(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutexvar_epi8 (simde__mmask16 k, simde__m128i idx, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutexvar_epi8(k, idx, a); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutexvar_epi8 - #define _mm_maskz_permutexvar_epi8(k, idx, a) simde_mm_maskz_permutexvar_epi8(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutexvar_epi16 (simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_epi16(idx, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i mask16 = simde_mm256_set1_epi16(0x001F); - simde__m256i shift16 = simde_mm256_set1_epi16(0x0202); - simde__m256i byte_index16 = simde_mm256_set1_epi16(0x0100); - simde__m256i index16 = simde_mm256_and_si256(idx, mask16); - index16 = simde_mm256_mullo_epi16(index16, shift16); - simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - simde__m256i select = simde_mm256_slli_epi64(index16, 3); - index16 = simde_mm256_add_epi16(index16, byte_index16); - lo = simde_mm256_shuffle_epi8(lo, index16); - hi = simde_mm256_shuffle_epi8(hi, index16); - return simde_mm256_blendv_epi8(lo, hi, select); - #else - simde__m256i_private - idx_ = simde__m256i_to_private(idx), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8 } }; - uint16x8_t mask16 = vdupq_n_u16(0x000F); - uint16x8_t byte_index16 = vdupq_n_u16(0x0100); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); - index16 = vmulq_n_u16(index16, 0x0202); - index16 = vaddq_u16(index16, byte_index16); - r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u16(index16)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; - mask16 = vec_splat_u16(0x000F); - shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); - byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); - index16 = vec_mladd(index16, shift16, byte_index16); - r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, - a_.m128i_private[1].altivec_u8, - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); - } - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t index, index16, r, t; - const v128_t mask16 = wasm_i16x8_splat(0x000F); - const v128_t shift16 = wasm_i16x8_splat(0x0202); - const v128_t byte_index16 = wasm_i16x8_splat(0x0100); - const v128_t sixteen = wasm_i8x16_splat(16); - const v128_t a0 = a_.m128i_private[0].wasm_v128; - const v128_t a1 = a_.m128i_private[1].wasm_v128; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index16 = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask16); - index16 = wasm_i16x8_mul(index16, shift16); - index = wasm_i16x8_add(index16, byte_index16); - r = wasm_i8x16_swizzle(a0, index); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a1, index); - r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[idx_.i16[i] & 0x0F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_epi16 - #define _mm256_permutexvar_epi16(idx, a) simde_mm256_permutexvar_epi16(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutexvar_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_epi16(src, k, idx, a); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_epi16 - #define _mm256_mask_permutexvar_epi16(src, k, idx, a) simde_mm256_mask_permutexvar_epi16(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutexvar_epi16 (simde__mmask16 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_epi16(k, idx, a); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_epi16 - #define _mm256_maskz_permutexvar_epi16(k, idx, a) simde_mm256_maskz_permutexvar_epi16(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutexvar_epi32 (simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_epi32(idx, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - idx_ = simde__m256i_to_private(idx), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8 } }; - uint32x4_t mask32 = vdupq_n_u32(0x00000007); - uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); - index32 = vmulq_n_u32(index32, 0x04040404); - index32 = vaddq_u32(index32, byte_index32); - r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u32(index32)); - } - #else - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 0x07]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_epi32 - #define _mm256_permutexvar_epi32(idx, a) simde_mm256_permutexvar_epi32(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutexvar_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_epi32(src, k, idx, a); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_epi32 - #define _mm256_mask_permutexvar_epi32(src, k, idx, a) simde_mm256_mask_permutexvar_epi32(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutexvar_epi32 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_epi32(k, idx, a); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_epi32 - #define _mm256_maskz_permutexvar_epi32(k, idx, a) simde_mm256_maskz_permutexvar_epi32(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutexvar_epi64 (simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_epi64(idx, a); - #else - simde__m256i_private - idx_ = simde__m256i_to_private(idx), - a_ = simde__m256i_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[idx_.i64[i] & 3]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_epi64 - #define _mm256_permutexvar_epi64(idx, a) simde_mm256_permutexvar_epi64(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutexvar_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_epi64(src, k, idx, a); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_epi64 - #define _mm256_mask_permutexvar_epi64(src, k, idx, a) simde_mm256_mask_permutexvar_epi64(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_epi64(k, idx, a); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_epi64 - #define _mm256_maskz_permutexvar_epi64(k, idx, a) simde_mm256_maskz_permutexvar_epi64(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutexvar_epi8 (simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_epi8(idx, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i mask = simde_mm256_set1_epi8(0x0F); - simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - simde__m256i index = simde_mm256_and_si256(idx, mask); - simde__m256i select = simde_mm256_slli_epi64(idx, 3); - lo = simde_mm256_shuffle_epi8(lo, index); - hi = simde_mm256_shuffle_epi8(hi, index); - return simde_mm256_blendv_epi8(lo, hi, select); - #else - simde__m256i_private - idx_ = simde__m256i_to_private(idx), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8 } }; - uint8x16_t mask = vdupq_n_u8(0x1F); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); - } - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t index, r, t; - const v128_t mask = wasm_i8x16_splat(0x1F); - const v128_t sixteen = wasm_i8x16_splat(16); - const v128_t a0 = a_.m128i_private[0].wasm_v128; - const v128_t a1 = a_.m128i_private[1].wasm_v128; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); - r = wasm_i8x16_swizzle(a0, index); - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a1, index); - r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[idx_.i8[i] & 0x1F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_epi8 - #define _mm256_permutexvar_epi8(idx, a) simde_mm256_permutexvar_epi8(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutexvar_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_epi8(src, k, idx, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_epi8 - #define _mm256_mask_permutexvar_epi8(src, k, idx, a) simde_mm256_mask_permutexvar_epi8(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutexvar_epi8 (simde__mmask32 k, simde__m256i idx, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_epi8(k, idx, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_epi8 - #define _mm256_maskz_permutexvar_epi8(k, idx, a) simde_mm256_maskz_permutexvar_epi8(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutexvar_pd (simde__m256i idx, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_pd(idx, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_permutexvar_epi64(idx, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_pd - #define _mm256_permutexvar_pd(idx, a) simde_mm256_permutexvar_pd(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_permutexvar_pd (simde__m256d src, simde__mmask8 k, simde__m256i idx, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_pd(src, k, idx, a); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_pd - #define _mm256_mask_permutexvar_pd(src, k, idx, a) simde_mm256_mask_permutexvar_pd(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_permutexvar_pd (simde__mmask8 k, simde__m256i idx, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_pd(k, idx, a); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_pd - #define _mm256_maskz_permutexvar_pd(k, idx, a) simde_mm256_maskz_permutexvar_pd(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutexvar_ps (simde__m256i idx, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutexvar_ps(idx, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_permutevar8x32_ps(a, idx); - #else - return simde_mm256_castsi256_ps(simde_mm256_permutexvar_epi32(idx, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutexvar_ps - #define _mm256_permutexvar_ps(idx, a) simde_mm256_permutexvar_ps(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_permutexvar_ps (simde__m256 src, simde__mmask8 k, simde__m256i idx, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutexvar_ps(src, k, idx, a); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutexvar_ps - #define _mm256_mask_permutexvar_ps(src, k, idx, a) simde_mm256_mask_permutexvar_ps(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_permutexvar_ps (simde__mmask8 k, simde__m256i idx, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutexvar_ps(k, idx, a); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutexvar_ps - #define _mm256_maskz_permutexvar_ps(k, idx, a) simde_mm256_maskz_permutexvar_ps(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi16 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutexvar_epi16(idx, a); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i t0, t1, index, select, a01, a23; - simde__m256i mask = simde_mm256_set1_epi16(0x001F); - simde__m256i shift = simde_mm256_set1_epi16(0x0202); - simde__m256i byte_index = simde_mm256_set1_epi16(0x0100); - simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); - simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); - simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); - simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = idx_.m256i[i]; - index = simde_mm256_and_si256(index, mask); - index = simde_mm256_mullo_epi16(index, shift); - index = simde_mm256_add_epi16(index, byte_index); - t0 = simde_mm256_shuffle_epi8(a0, index); - t1 = simde_mm256_shuffle_epi8(a1, index); - select = simde_mm256_slli_epi64(index, 3); - a01 = simde_mm256_blendv_epi8(t0, t1, select); - t0 = simde_mm256_shuffle_epi8(a2, index); - t1 = simde_mm256_shuffle_epi8(a3, index); - a23 = simde_mm256_blendv_epi8(t0, t1, select); - select = simde_mm256_slli_epi64(index, 2); - r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8, - a_.m128i_private[2].neon_u8, - a_.m128i_private[3].neon_u8 } }; - uint16x8_t mask16 = vdupq_n_u16(0x001F); - uint16x8_t byte_index16 = vdupq_n_u16(0x0100); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); - index16 = vmulq_n_u16(index16, 0x0202); - index16 = vaddq_u16(index16, byte_index16); - r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u16(index16)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; - mask16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x001F)); - shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); - byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); - test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); - index16 = vec_mladd(index16, shift16, byte_index16); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); - r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); - r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); - r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); - } - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t index, r, t; - const v128_t mask = wasm_i16x8_splat(0x001F); - const v128_t shift = wasm_i16x8_splat(0x0202); - const v128_t byte_index = wasm_i16x8_splat(0x0100); - const v128_t sixteen = wasm_i8x16_splat(16); - const v128_t a0 = a_.m128i_private[0].wasm_v128; - const v128_t a1 = a_.m128i_private[1].wasm_v128; - const v128_t a2 = a_.m128i_private[2].wasm_v128; - const v128_t a3 = a_.m128i_private[3].wasm_v128; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); - index = wasm_i16x8_mul(index, shift); - index = wasm_i16x8_add(index, byte_index); - r = wasm_i8x16_swizzle(a0, index); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a1, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a2, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a3, index); - r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[idx_.i16[i] & 0x1F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi16 - #define _mm512_permutexvar_epi16(idx, a) simde_mm512_permutexvar_epi16(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutexvar_epi16(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi16 - #define _mm512_mask_permutexvar_epi16(src, k, idx, a) simde_mm512_mask_permutexvar_epi16(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi16 (simde__mmask32 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutexvar_epi16(k, idx, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutexvar_epi16(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi16 - #define _mm512_maskz_permutexvar_epi16(k, idx, a) simde_mm512_maskz_permutexvar_epi16(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi32 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_epi32(idx, a); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i index, r0, r1, select; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = idx_.m256i[i]; - r0 = simde_mm256_permutevar8x32_epi32(a_.m256i[0], index); - r1 = simde_mm256_permutevar8x32_epi32(a_.m256i[1], index); - select = simde_mm256_slli_epi32(index, 28); - r_.m256i[i] = simde_mm256_castps_si256(simde_mm256_blendv_ps(simde_mm256_castsi256_ps(r0), - simde_mm256_castsi256_ps(r1), - simde_mm256_castsi256_ps(select))); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8, - a_.m128i_private[2].neon_u8, - a_.m128i_private[3].neon_u8 } }; - uint32x4_t mask32 = vdupq_n_u32(0x0000000F); - uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); - index32 = vmulq_n_u32(index32, 0x04040404); - index32 = vaddq_u32(index32, byte_index32); - r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u32(index32)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) index32, mask32, byte_index32, temp32, sixteen; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) zero, shift; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; - mask32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x0000000F)); - byte_index32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100)); - zero = vec_splat_u16(0); - shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)); - sixteen = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)); - test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index32 = vec_and(idx_.m128i_private[i].altivec_u32, mask32); - - /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ - temp32 = vec_sl(index32, sixteen); - index32 = vec_add(index32, temp32); - index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), - shift, - zero)); - - index32 = vec_add(index32, byte_index32); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); - r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); - r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); - r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); - } - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t index, r, t; - const v128_t mask = wasm_i32x4_splat(0x0000000F); - const v128_t shift = wasm_i32x4_splat(0x04040404); - const v128_t byte_index = wasm_i32x4_splat(0x03020100); - const v128_t sixteen = wasm_i8x16_splat(16); - const v128_t a0 = a_.m128i_private[0].wasm_v128; - const v128_t a1 = a_.m128i_private[1].wasm_v128; - const v128_t a2 = a_.m128i_private[2].wasm_v128; - const v128_t a3 = a_.m128i_private[3].wasm_v128; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); - index = wasm_i32x4_mul(index, shift); - index = wasm_i32x4_add(index, byte_index); - r = wasm_i8x16_swizzle(a0, index); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a1, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a2, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a3, index); - r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); - } - #else - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi32 - #define _mm512_permutexvar_epi32(idx, a) simde_mm512_permutexvar_epi32(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_epi32(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi32 - #define _mm512_mask_permutexvar_epi32(src, k, idx, a) simde_mm512_mask_permutexvar_epi32(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi32 (simde__mmask16 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_epi32(k, idx, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi32 - #define _mm512_maskz_permutexvar_epi32(k, idx, a) simde_mm512_maskz_permutexvar_epi32(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi64 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_epi64(idx, a); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[idx_.i64[i] & 7]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi64 - #define _mm512_permutexvar_epi64(idx, a) simde_mm512_permutexvar_epi64(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_epi64(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi64 - #define _mm512_mask_permutexvar_epi64(src, k, idx, a) simde_mm512_mask_permutexvar_epi64(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_epi64(k, idx, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi64 - #define _mm512_maskz_permutexvar_epi64(k, idx, a) simde_mm512_maskz_permutexvar_epi64(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi8 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_permutexvar_epi8(idx, a); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - simde__m512i hilo, hi, lo, hi2, lo2, idx2; - simde__m512i ones = simde_mm512_set1_epi8(1); - simde__m512i low_bytes = simde_mm512_set1_epi16(0x00FF); - - idx2 = simde_mm512_srli_epi16(idx, 1); - hilo = simde_mm512_permutexvar_epi16(idx2, a); - simde__mmask64 mask = simde_mm512_test_epi8_mask(idx, ones); - lo = simde_mm512_and_si512(hilo, low_bytes); - hi = simde_mm512_srli_epi16(hilo, 8); - - idx2 = simde_mm512_srli_epi16(idx, 9); - hilo = simde_mm512_permutexvar_epi16(idx2, a); - lo2 = simde_mm512_slli_epi16(hilo, 8); - hi2 = simde_mm512_andnot_si512(low_bytes, hilo); - - lo = simde_mm512_or_si512(lo, lo2); - hi = simde_mm512_or_si512(hi, hi2); - - return simde_mm512_mask_blend_epi8(mask, lo, hi); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i t0, t1, index, select, a01, a23; - simde__m256i mask = simde_mm256_set1_epi8(0x3F); - simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); - simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); - simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); - simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = idx_.m256i[i]; - index = simde_mm256_and_si256(index, mask); - select = simde_mm256_slli_epi64(index, 3); - t0 = simde_mm256_shuffle_epi8(a0, index); - t1 = simde_mm256_shuffle_epi8(a1, index); - a01 = simde_mm256_blendv_epi8(t0, t1, select); - t0 = simde_mm256_shuffle_epi8(a2, index); - t1 = simde_mm256_shuffle_epi8(a3, index); - a23 = simde_mm256_blendv_epi8(t0, t1, select); - select = simde_mm256_slli_epi64(index, 2); - r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, - a_.m128i_private[1].neon_u8, - a_.m128i_private[2].neon_u8, - a_.m128i_private[3].neon_u8 } }; - uint8x16_t mask = vdupq_n_u8(0x3F); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); - } - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) test, r01, r23; - test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); - r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, idx_.m128i_private[i].altivec_u8); - r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(idx_.m128i_private[i].altivec_u8, test), test)); - } - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t index, r, t; - const v128_t mask = wasm_i8x16_splat(0x3F); - const v128_t sixteen = wasm_i8x16_splat(16); - const v128_t a0 = a_.m128i_private[0].wasm_v128; - const v128_t a1 = a_.m128i_private[1].wasm_v128; - const v128_t a2 = a_.m128i_private[2].wasm_v128; - const v128_t a3 = a_.m128i_private[3].wasm_v128; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); - r = wasm_i8x16_swizzle(a0, index); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a1, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a2, index); - r = wasm_v128_or(r, t); - - index = wasm_i8x16_sub(index, sixteen); - t = wasm_i8x16_swizzle(a3, index); - r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[idx_.i8[i] & 0x3F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi8 - #define _mm512_permutexvar_epi8(idx, a) simde_mm512_permutexvar_epi8(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask_permutexvar_epi8(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi8 - #define _mm512_mask_permutexvar_epi8(src, k, idx, a) simde_mm512_mask_permutexvar_epi8(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi8 (simde__mmask64 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_maskz_permutexvar_epi8(k, idx, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutexvar_epi8(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi8 - #define _mm512_maskz_permutexvar_epi8(k, idx, a) simde_mm512_maskz_permutexvar_epi8(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_pd(idx, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_permutexvar_epi64(idx, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_pd - #define _mm512_permutexvar_pd(idx, a) simde_mm512_permutexvar_pd(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_permutexvar_pd (simde__m512d src, simde__mmask8 k, simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_pd(src, k, idx, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_pd - #define _mm512_mask_permutexvar_pd(src, k, idx, a) simde_mm512_mask_permutexvar_pd(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_permutexvar_pd (simde__mmask8 k, simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_pd(k, idx, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_pd - #define _mm512_maskz_permutexvar_pd(k, idx, a) simde_mm512_maskz_permutexvar_pd(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_permutexvar_ps (simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_ps(idx, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_permutexvar_epi32(idx, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_ps - #define _mm512_permutexvar_ps(idx, a) simde_mm512_permutexvar_ps(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_permutexvar_ph (simde__m512i idx, simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_permutexvar_ph(idx, a); - #else - return simde_mm512_castsi512_ph(simde_mm512_permutexvar_epi16(idx, simde_mm512_castph_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_ph - #define _mm512_permutexvar_ph(idx, a) simde_mm512_permutexvar_ph(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_permutexvar_ps (simde__m512 src, simde__mmask16 k, simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_ps(src, k, idx, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_ps - #define _mm512_mask_permutexvar_ps(src, k, idx, a) simde_mm512_mask_permutexvar_ps(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_permutexvar_ps (simde__mmask16 k, simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_ps(k, idx, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_ps - #define _mm512_maskz_permutexvar_ps(k, idx, a) simde_mm512_maskz_permutexvar_ps(k, idx, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) */ -/* :: End simde/x86/avx512/permutexvar.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/permutex2var.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) -#define SIMDE_X86_AVX512_PERMUTEX2VAR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The following generic code avoids many, nearly identical, repetitions of fairly complex code. - * If the compiler optimizes well, in particular extracting invariant code from loops - * and simplifying code involving constants passed as arguments, it should not be - * significantly slower than specific code. - * Note that when the original vector contains few elements, these implementations - * may not be faster than portable code. - */ -#if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC -#endif - -#if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { - const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; - - #if defined(SIMDE_X86_SSE3_NATIVE) - __m128i ra, rb, t, test, select, index; - const __m128i sixteen = _mm_set1_epi8(16); - - /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ - switch (log2_index_size) { - default: /* Avoid uninitialized variable warning/error */ - case 0: - index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); - break; - case 1: - index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); - index = _mm_slli_epi32(index, 1); - t = _mm_slli_epi32(index, 8); - index = _mm_or_si128(index, t); - index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); - break; - case 2: - index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); - index = _mm_slli_epi32(index, 2); - t = _mm_slli_epi32(index, 8); - index = _mm_or_si128(index, t); - t = _mm_slli_epi32(index, 16); - index = _mm_or_si128(index, t); - index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); - break; - } - - test = index; - index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); - test = _mm_cmpgt_epi8(test, index); - - ra = _mm_shuffle_epi8(a[0], index); - rb = _mm_shuffle_epi8(b[0], index); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - select = _mm_cmplt_epi8(index, sixteen); - index = _mm_sub_epi8(index, sixteen); - ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); - rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); - } - - return _mm_blendv_epi8(ra, rb, test); - #else - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - select = _mm_cmplt_epi8(index, sixteen); - index = _mm_sub_epi8(index, sixteen); - ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); - rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); - } - - return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); - #endif - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16_t index, r; - uint16x8_t index16; - uint32x4_t index32; - uint8x16x2_t table2_a, table2_b; - uint8x16x4_t table4_a, table4_b; - - switch (log2_index_size) { - case 0: - index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); - break; - case 1: - index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); - index16 = vmulq_n_u16(index16, 0x0202); - index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); - index = vreinterpretq_u8_u16(index16); - break; - case 2: - index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); - index32 = vmulq_n_u32(index32, 0x04040404); - index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); - index = vreinterpretq_u8_u32(index32); - break; - } - - uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1)); - - switch (log2_data_length) { - case 0: - r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); - break; - case 1: - table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); - table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); - table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); - table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); - r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); - break; - case 2: - table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); - table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); - table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); - table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); - table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); - table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); - table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); - table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); - r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); - break; - } - - return simde__m128i_from_neon_u8(r); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; - - switch (log2_index_size) { - default: /* Avoid uninitialized variable warning/error */ - case 0: - index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); - break; - case 1: - index16 = simde__m128i_to_altivec_u16(idx); - index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); - index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); - break; - case 2: - index32 = simde__m128i_to_altivec_u32(idx); - index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); - - /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ - temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); - index32 = vec_add(index32, temp32); - index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), - vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), - vec_splat_u16(0))); - - index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); - break; - } - - if (log2_data_length == 0) { - r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); - } - else { - s = index; - index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); - test = vec_cmpgt(s, index); - - ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); - rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); - - SIMDE_VECTORIZE - for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { - select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); - index = vec_sub(index, thirty_two); - t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); - ra = vec_sel(t, ra, select); - t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); - rb = vec_sel(t, rb, select); - } - - r = vec_sel(ra, rb, test); - } - - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sixteen = wasm_i8x16_splat(16); - - v128_t index = simde__m128i_to_wasm_v128(idx); - - switch (log2_index_size) { - case 0: - index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); - break; - case 1: - index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); - index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); - index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); - break; - case 2: - index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); - index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); - index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); - break; - } - - v128_t r = wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); - - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - index = wasm_i8x16_sub(index, sixteen); - r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); - } - - SIMDE_VECTORIZE - for (int i = 0 ; i < (1 << log2_data_length) ; i++) { - index = wasm_i8x16_sub(index, sixteen); - r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); - } - - return simde__m128i_from_wasm_v128(r); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { - SIMDE_VECTORIZE - for (int i = 0 ; i < (1 << log2_data_length) ; i++) { - r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); - } -} -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi16(a, idx, b); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi16 - #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi16 -#define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi16 -#define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi16 -#define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi32(a, idx, b); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi32 - #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi32 -#define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi32 -#define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi32 -#define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi64(a, idx, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi64 - #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi64 -#define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi64 -#define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi64 -#define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi8 - #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi8 -#define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi8 -#define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi8 -#define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_pd(a, idx, b); - #else - return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_pd - #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_pd -#define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_pd -#define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_pd -#define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_ps(a, idx, b); - #else - return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_ps - #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_ps -#define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_ps -#define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_ps -#define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi16(a, idx, b); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; - const __m256i ones = _mm256_set1_epi16(1); - - idx2 = _mm256_srli_epi32(idx, 1); - - ta = _mm256_permutevar8x32_epi32(a, idx2); - tb = _mm256_permutevar8x32_epi32(b, idx2); - select = _mm256_slli_epi32(idx2, 28); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - idx2 = _mm256_srli_epi32(idx2, 16); - - ta = _mm256_permutevar8x32_epi32(a, idx2); - tb = _mm256_permutevar8x32_epi32(b, idx2); - select = _mm256_slli_epi32(idx2, 28); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - - lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); - hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); - - select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); - return _mm256_blendv_epi8(lo, hi, select); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi16 - #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi16 -#define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi16 -#define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi16 -#define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi32(a, idx, b); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i ta, tb, select; - ta = _mm256_permutevar8x32_epi32(a, idx); - tb = _mm256_permutevar8x32_epi32(b, idx); - select = _mm256_slli_epi32(idx, 28); - return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi32 - #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi32 -#define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi32 -#define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi32 -#define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi64(a, idx, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi64 - #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi64 -#define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi64 -#define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi64 -#define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t0, t1, index, select0x10, select0x20, a01, b01; - const __m256i mask = _mm256_set1_epi8(0x3F); - const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - - index = _mm256_and_si256(idx, mask); - t0 = _mm256_shuffle_epi8(a0, index); - t1 = _mm256_shuffle_epi8(a1, index); - select0x10 = _mm256_slli_epi64(index, 3); - a01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(b0, index); - t1 = _mm256_shuffle_epi8(b1, index); - b01 = _mm256_blendv_epi8(t0, t1, select0x10); - select0x20 = _mm256_slli_epi64(index, 2); - return _mm256_blendv_epi8(a01, b01, select0x20); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi8 - #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi8 -#define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi8 -#define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi8 -#define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_pd(a, idx, b); - #else - return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_pd - #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_pd -#define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_pd -#define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_pd -#define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_ps(a, idx, b); - #else - return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_ps - #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_ps -#define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_ps -#define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_ps -#define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_epi16(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; - const __m256i ones = _mm256_set1_epi16(1); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - idx1 = idx_.m256i[i]; - idx2 = _mm256_srli_epi32(idx1, 1); - - select = _mm256_slli_epi32(idx2, 27); - ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); - hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - select = _mm256_add_epi32(select, select); - hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), - _mm256_castsi256_ps(hilo1), - _mm256_castsi256_ps(select))); - - idx2 = _mm256_srli_epi32(idx2, 16); - - select = _mm256_slli_epi32(idx2, 27); - ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - select = _mm256_add_epi32(select, select); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), - _mm256_castsi256_ps(hilo2), - _mm256_castsi256_ps(select))); - - lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); - hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); - - select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); - r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi16 - #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi16 -#define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi16 -#define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi16 -#define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_epi32(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i index, t0, t1, a01, b01, select; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = idx_.m256i[i]; - t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); - t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); - select = _mm256_slli_epi32(index, 28); - a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), - _mm256_castsi256_ps(t1), - _mm256_castsi256_ps(select))); - t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); - t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); - b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), - _mm256_castsi256_ps(t1), - _mm256_castsi256_ps(select))); - select = _mm256_slli_epi32(index, 27); - r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), - _mm256_castsi256_ps(b01), - _mm256_castsi256_ps(select))); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi32 - #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi32 -#define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi32 -#define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi32 -#define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_epi64(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi64 - #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi64 -#define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi64 -#define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi64 -#define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m512i hilo, hi, lo, hi2, lo2, idx2; - const __m512i ones = _mm512_set1_epi8(1); - const __m512i low_bytes = _mm512_set1_epi16(0x00FF); - - idx2 = _mm512_srli_epi16(idx, 1); - hilo = _mm512_permutex2var_epi16(a, idx2, b); - __mmask64 mask = _mm512_test_epi8_mask(idx, ones); - lo = _mm512_and_si512(hilo, low_bytes); - hi = _mm512_srli_epi16(hilo, 8); - - idx2 = _mm512_srli_epi16(idx, 9); - hilo = _mm512_permutex2var_epi16(a, idx2, b); - lo2 = _mm512_slli_epi16(hilo, 8); - hi2 = _mm512_andnot_si512(low_bytes, hilo); - - lo = _mm512_or_si512(lo, lo2); - hi = _mm512_or_si512(hi, hi2); - - return _mm512_mask_blend_epi8(mask, lo, hi); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; - const __m256i mask = _mm256_set1_epi8(0x7F); - const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = _mm256_and_si256(idx_.m256i[i], mask); - t0 = _mm256_shuffle_epi8(a0, index); - t1 = _mm256_shuffle_epi8(a1, index); - select0x10 = _mm256_slli_epi64(index, 3); - t01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(a2, index); - t1 = _mm256_shuffle_epi8(a3, index); - t23 = _mm256_blendv_epi8(t0, t1, select0x10); - select0x20 = _mm256_slli_epi64(index, 2); - a0123 = _mm256_blendv_epi8(t01, t23, select0x20); - t0 = _mm256_shuffle_epi8(b0, index); - t1 = _mm256_shuffle_epi8(b1, index); - t01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(b2, index); - t1 = _mm256_shuffle_epi8(b3, index); - t23 = _mm256_blendv_epi8(t0, t1, select0x10); - b0123 = _mm256_blendv_epi8(t01, t23, select0x20); - select0x40 = _mm256_slli_epi64(index, 1); - r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi8 - #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi8 -#define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi8 -#define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi8 -#define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_pd(a, idx, b); - #else - return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_pd - #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_pd -#define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_pd -#define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_pd -#define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_ps(a, idx, b); - #else - return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_ps - #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_ps -#define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_ps -#define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_ps -#define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) */ -/* :: End simde/x86/avx512/permutex2var.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/popcnt.h :: */ -#if !defined(SIMDE_X86_AVX512_POPCNT_H) -#define SIMDE_X86_AVX512_POPCNT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_popcnt_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_popcnt_epi8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcntq_s8(a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_popcnt(a_.wasm_v128); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - const __m128i low_nibble_set = _mm_set1_epi8(0x0f); - const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); - const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); - const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - - r_.n = - _mm_add_epi8( - _mm_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm_shuffle_epi8( - lut, - _mm_srli_epi16( - high_nibble_of_input, - 4 - ) - ) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* v -= ((v >> 1) & UINT8_C(0x55)); */ - r_.n = - _mm_sub_epi8( - a_.n, - _mm_and_si128( - _mm_srli_epi16(a_.n, 1), - _mm_set1_epi8(0x55) - ) - ); - - /* v = (v & 0x33) + ((v >> 2) & 0x33); */ - r_.n = - _mm_add_epi8( - _mm_and_si128( - r_.n, - _mm_set1_epi8(0x33) - ), - _mm_and_si128( - _mm_srli_epi16(r_.n, 2), - _mm_set1_epi8(0x33) - ) - ); - - /* v = (v + (v >> 4)) & 0xf */ - r_.n = - _mm_and_si128( - _mm_add_epi8( - r_.n, - _mm_srli_epi16(r_.n, 4) - ), - _mm_set1_epi8(0x0f) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a_.altivec_i8))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u8 -= ((a_.u8 >> 1) & 0x55); - a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); - a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; - r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); - v -= ((v >> 1) & 0x55); - v = (v & 0x33) + ((v >> 2) & 0x33); - v = (v + (v >> 4)) & 0xf; - r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_popcnt_epi8 - #define _mm_popcnt_epi8(a) simde_mm_popcnt_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_popcnt_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_popcnt_epi8(src, k, a); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_popcnt_epi8 - #define _mm_mask_popcnt_epi8(src, k, a) simde_mm_mask_popcnt_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_popcnt_epi8 (simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_popcnt_epi8(k, a); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_popcnt_epi8 - #define _mm_maskz_popcnt_epi8(k, a) simde_mm_maskz_popcnt_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_popcnt_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_popcnt_epi16(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vpaddlq_s8(vcntq_s8(a_.neon_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extadd_pairwise_i8x16(wasm_i8x16_popcnt(a_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), a_.altivec_u16))); - #elif defined(SIMDE_X86_XOP_NATIVE) - const __m128i low_nibble_set = _mm_set1_epi8(0x0f); - const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); - const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); - const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - - r_.n = - _mm_haddw_epi8( - _mm_add_epi8( - _mm_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm_shuffle_epi8( - lut, - _mm_srli_epi16(high_nibble_of_input, 4) - ) - ) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.n = - _mm_sub_epi16( - a_.n, - _mm_and_si128( - _mm_srli_epi16(a_.n, 1), - _mm_set1_epi16(0x5555) - ) - ); - - r_.n = - _mm_add_epi16( - _mm_and_si128( - r_.n, - _mm_set1_epi16(0x3333) - ), - _mm_and_si128( - _mm_srli_epi16(r_.n, 2), - _mm_set1_epi16(0x3333) - ) - ); - - r_.n = - _mm_and_si128( - _mm_add_epi16( - r_.n, - _mm_srli_epi16(r_.n, 4) - ), - _mm_set1_epi16(0x0f0f) - ); - - r_.n = - _mm_srli_epi16( - _mm_mullo_epi16( - r_.n, - _mm_set1_epi16(0x0101) - ), - (sizeof(uint16_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); - a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); - a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); - r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); - v -= ((v >> 1) & UINT16_C(0x5555)); - v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); - v = (v + (v >> 4)) & UINT16_C(0x0f0f); - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_popcnt_epi16 - #define _mm_popcnt_epi16(a) simde_mm_popcnt_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_popcnt_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_popcnt_epi16(src, k, a); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_popcnt_epi16 - #define _mm_mask_popcnt_epi16(src, k, a) simde_mm_mask_popcnt_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_popcnt_epi16 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_popcnt_epi16(k, a); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_popcnt_epi16 - #define _mm_maskz_popcnt_epi16(k, a) simde_mm_maskz_popcnt_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_popcnt_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_popcnt_epi32(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(vcntq_s8(a_.neon_i8))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), a_.altivec_u32))); - #elif defined(SIMDE_X86_XOP_NATIVE) - const __m128i low_nibble_set = _mm_set1_epi8(0x0f); - const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); - const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); - const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - - r_.n = - _mm_haddd_epi8( - _mm_add_epi8( - _mm_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm_shuffle_epi8( - lut, - _mm_srli_epi16(high_nibble_of_input, 4) - ) - ) - ); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - r_.n = - _mm_sub_epi32( - a_.n, - _mm_and_si128( - _mm_srli_epi32(a_.n, 1), - _mm_set1_epi32(0x55555555) - ) - ); - - r_.n = - _mm_add_epi32( - _mm_and_si128( - r_.n, - _mm_set1_epi32(0x33333333) - ), - _mm_and_si128( - _mm_srli_epi32(r_.n, 2), - _mm_set1_epi32(0x33333333) - ) - ); - - r_.n = - _mm_and_si128( - _mm_add_epi32( - r_.n, - _mm_srli_epi32(r_.n, 4) - ), - _mm_set1_epi32(0x0f0f0f0f) - ); - - r_.n = - _mm_srli_epi32( - _mm_mullo_epi32( - r_.n, - _mm_set1_epi32(0x01010101) - ), - (sizeof(uint32_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); - a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); - a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); - v -= ((v >> 1) & UINT32_C(0x55555555)); - v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); - v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_popcnt_epi32 - #define _mm_popcnt_epi32(a) simde_mm_popcnt_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_popcnt_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_popcnt_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_popcnt_epi32 - #define _mm_mask_popcnt_epi32(src, k, a) simde_mm_mask_popcnt_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_popcnt_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_popcnt_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_popcnt_epi32 - #define _mm_maskz_popcnt_epi32(k, a) simde_mm_maskz_popcnt_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_popcnt_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_popcnt_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(vcntq_s8(a_.neon_i8)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a_.altivec_u64))); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - const __m128i low_nibble_set = _mm_set1_epi8(0x0f); - const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); - const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); - const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); - - r_.n = - _mm_sad_epu8( - _mm_add_epi8( - _mm_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm_shuffle_epi8( - lut, - _mm_srli_epi16(high_nibble_of_input, 4) - ) - ), - _mm_setzero_si128() - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.n = - _mm_sub_epi8( - a_.n, - _mm_and_si128( - _mm_srli_epi16(a_.n, 1), - _mm_set1_epi8(0x55) - ) - ); - - r_.n = - _mm_add_epi8( - _mm_and_si128( - r_.n, - _mm_set1_epi8(0x33) - ), - _mm_and_si128( - _mm_srli_epi16(r_.n, 2), - _mm_set1_epi8(0x33) - ) - ); - - r_.n = - _mm_and_si128( - _mm_add_epi8( - r_.n, - _mm_srli_epi16(r_.n, 4) - ), - _mm_set1_epi8(0x0f) - ); - - r_.n = - _mm_sad_epu8( - r_.n, - _mm_setzero_si128() - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); - a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); - a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); - v -= ((v >> 1) & UINT64_C(0x5555555555555555)); - v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); - v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_popcnt_epi64 - #define _mm_popcnt_epi64(a) simde_mm_popcnt_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_popcnt_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_popcnt_epi64(src, k, a); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_popcnt_epi64 - #define _mm_mask_popcnt_epi64(src, k, a) simde_mm_mask_popcnt_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_popcnt_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_popcnt_epi64(k, a); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_popcnt_epi64 - #define _mm_maskz_popcnt_epi64(k, a) simde_mm_maskz_popcnt_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_popcnt_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_popcnt_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi8(a_.m128i[i]); - } - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i low_nibble_set = _mm256_set1_epi8(0x0f); - const __m256i high_nibble_of_input = _mm256_andnot_si256(low_nibble_set, a_.n); - const __m256i low_nibble_of_input = _mm256_and_si256(low_nibble_set, a_.n); - const __m256i lut = - _mm256_set_epi8( - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 - ); - - r_.n = - _mm256_add_epi8( - _mm256_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm256_shuffle_epi8( - lut, - _mm256_srli_epi16( - high_nibble_of_input, - 4 - ) - ) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u8 -= ((a_.u8 >> 1) & 0x55); - a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); - a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; - r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); - v -= ((v >> 1) & 0x55); - v = (v & 0x33) + ((v >> 2) & 0x33); - v = (v + (v >> 4)) & 0xf; - r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_popcnt_epi8 - #define _mm256_popcnt_epi8(a) simde_mm256_popcnt_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_popcnt_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_popcnt_epi8(src, k, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_popcnt_epi8 - #define _mm256_mask_popcnt_epi8(src, k, a) simde_mm256_mask_popcnt_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_popcnt_epi8 (simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_popcnt_epi8(k, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_popcnt_epi8 - #define _mm256_maskz_popcnt_epi8(k, a) simde_mm256_maskz_popcnt_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_popcnt_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_popcnt_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi16(a_.m128i[i]); - } - #elif defined(SIMDE_X86_AVX2_NATIVE) - r_.n = - _mm256_sub_epi16( - a_.n, - _mm256_and_si256( - _mm256_srli_epi16(a_.n, 1), - _mm256_set1_epi16(0x5555) - ) - ); - - r_.n = - _mm256_add_epi16( - _mm256_and_si256( - r_.n, - _mm256_set1_epi16(0x3333) - ), - _mm256_and_si256( - _mm256_srli_epi16(r_.n, 2), - _mm256_set1_epi16(0x3333) - ) - ); - - r_.n = - _mm256_and_si256( - _mm256_add_epi16( - r_.n, - _mm256_srli_epi16(r_.n, 4) - ), - _mm256_set1_epi16(0x0f0f) - ); - - r_.n = - _mm256_srli_epi16( - _mm256_mullo_epi16( - r_.n, - _mm256_set1_epi16(0x0101) - ), - (sizeof(uint16_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); - a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); - a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); - r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); - v -= ((v >> 1) & UINT16_C(0x5555)); - v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); - v = (v + (v >> 4)) & UINT16_C(0x0f0f); - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_popcnt_epi16 - #define _mm256_popcnt_epi16(a) simde_mm256_popcnt_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_popcnt_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_popcnt_epi16(src, k, a); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_popcnt_epi16 - #define _mm256_mask_popcnt_epi16(src, k, a) simde_mm256_mask_popcnt_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_popcnt_epi16 (simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_popcnt_epi16(k, a); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_popcnt_epi16 - #define _mm256_maskz_popcnt_epi16(k, a) simde_mm256_maskz_popcnt_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_popcnt_epi32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_popcnt_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi32(a_.m128i[i]); - } - #elif defined(SIMDE_X86_AVX2_NATIVE) - r_.n = - _mm256_sub_epi32( - a_.n, - _mm256_and_si256( - _mm256_srli_epi32(a_.n, 1), - _mm256_set1_epi32(0x55555555) - ) - ); - - r_.n = - _mm256_add_epi32( - _mm256_and_si256( - r_.n, - _mm256_set1_epi32(0x33333333) - ), - _mm256_and_si256( - _mm256_srli_epi32(r_.n, 2), - _mm256_set1_epi32(0x33333333) - ) - ); - - r_.n = - _mm256_and_si256( - _mm256_add_epi32( - r_.n, - _mm256_srli_epi32(r_.n, 4) - ), - _mm256_set1_epi32(0x0f0f0f0f) - ); - - r_.n = - _mm256_srli_epi32( - _mm256_mullo_epi32( - r_.n, - _mm256_set1_epi32(0x01010101) - ), - (sizeof(uint32_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); - a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); - a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); - v -= ((v >> 1) & UINT32_C(0x55555555)); - v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); - v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_popcnt_epi32 - #define _mm256_popcnt_epi32(a) simde_mm256_popcnt_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_popcnt_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_popcnt_epi32(src, k, a); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_popcnt_epi32 - #define _mm256_mask_popcnt_epi32(src, k, a) simde_mm256_mask_popcnt_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_popcnt_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_popcnt_epi32(k, a); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_popcnt_epi32 - #define _mm256_maskz_popcnt_epi32(k, a) simde_mm256_maskz_popcnt_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_popcnt_epi64 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_popcnt_epi64(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < sizeof(r_.m128i) / sizeof(r_.m128i[0]) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi64(a_.m128i[i]); - } - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i low_nibble_set = _mm256_set1_epi8(0x0f); - const __m256i high_nibble_of_input = _mm256_andnot_si256(low_nibble_set, a_.n); - const __m256i low_nibble_of_input = _mm256_and_si256(low_nibble_set, a_.n); - const __m256i lut = - _mm256_set_epi8( - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 - ); - - r_.n = - _mm256_sad_epu8( - _mm256_add_epi8( - _mm256_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm256_shuffle_epi8( - lut, - _mm256_srli_epi16(high_nibble_of_input, 4) - ) - ), - _mm256_setzero_si256() - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); - a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); - a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); - v -= ((v >> 1) & UINT64_C(0x5555555555555555)); - v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); - v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_popcnt_epi64 - #define _mm256_popcnt_epi64(a) simde_mm256_popcnt_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_popcnt_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_popcnt_epi64(src, k, a); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_popcnt_epi64 - #define _mm256_mask_popcnt_epi64(src, k, a) simde_mm256_mask_popcnt_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_popcnt_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_popcnt_epi64(k, a); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_popcnt_epi64 - #define _mm256_maskz_popcnt_epi64(k, a) simde_mm256_maskz_popcnt_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_popcnt_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_popcnt_epi8(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi8(a_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_popcnt_epi8(a_.m256i[i]); - } - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - const __m512i low_nibble_set = _mm512_set1_epi8(0x0f); - const __m512i high_nibble_of_input = _mm512_andnot_si512(low_nibble_set, a_.n); - const __m512i low_nibble_of_input = _mm512_and_si512(low_nibble_set, a_.n); - const __m512i lut = - simde_mm512_set_epi8( - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 - ); - - r_.n = - _mm512_add_epi8( - _mm512_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm512_shuffle_epi8( - lut, - _mm512_srli_epi16( - high_nibble_of_input, - 4 - ) - ) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u8 -= ((a_.u8 >> 1) & 0x55); - a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); - a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; - r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); - v -= ((v >> 1) & 0x55); - v = (v & 0x33) + ((v >> 2) & 0x33); - v = (v + (v >> 4)) & 0xf; - r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_popcnt_epi8 - #define _mm512_popcnt_epi8(a) simde_mm512_popcnt_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_popcnt_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_mask_popcnt_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_popcnt_epi8 - #define _mm512_mask_popcnt_epi8(src, k, a) simde_mm512_mask_popcnt_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_popcnt_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_maskz_popcnt_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_popcnt_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_popcnt_epi8 - #define _mm512_maskz_popcnt_epi8(k, a) simde_mm512_maskz_popcnt_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_popcnt_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_popcnt_epi16(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi16(a_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_popcnt_epi16(a_.m256i[i]); - } - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - r_.n = - _mm512_sub_epi16( - a_.n, - _mm512_and_si512( - _mm512_srli_epi16(a_.n, 1), - _mm512_set1_epi16(0x5555) - ) - ); - - r_.n = - _mm512_add_epi16( - _mm512_and_si512( - r_.n, - _mm512_set1_epi16(0x3333) - ), - _mm512_and_si512( - _mm512_srli_epi16(r_.n, 2), - _mm512_set1_epi16(0x3333) - ) - ); - - r_.n = - _mm512_and_si512( - _mm512_add_epi16( - r_.n, - _mm512_srli_epi16(r_.n, 4) - ), - _mm512_set1_epi16(0x0f0f) - ); - - r_.n = - _mm512_srli_epi16( - _mm512_mullo_epi16( - r_.n, - _mm512_set1_epi16(0x0101) - ), - (sizeof(uint16_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); - a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); - a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); - r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); - v -= ((v >> 1) & UINT16_C(0x5555)); - v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); - v = (v + (v >> 4)) & UINT16_C(0x0f0f); - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_popcnt_epi16 - #define _mm512_popcnt_epi16(a) simde_mm512_popcnt_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_popcnt_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_mask_popcnt_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_popcnt_epi16 - #define _mm512_mask_popcnt_epi16(src, k, a) simde_mm512_mask_popcnt_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_popcnt_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BITALG_NATIVE) - return _mm512_maskz_popcnt_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_popcnt_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_popcnt_epi16 - #define _mm512_maskz_popcnt_epi16(k, a) simde_mm512_maskz_popcnt_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_popcnt_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_popcnt_epi32(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi32(a_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_popcnt_epi32(a_.m256i[i]); - } - #elif defined(SIMDE_X86_AVX512F_NATIVE) - r_.n = - _mm512_sub_epi32( - a_.n, - _mm512_and_si512( - _mm512_srli_epi32(a_.n, 1), - _mm512_set1_epi32(0x55555555) - ) - ); - - r_.n = - _mm512_add_epi32( - _mm512_and_si512( - r_.n, - _mm512_set1_epi32(0x33333333) - ), - _mm512_and_si512( - _mm512_srli_epi32(r_.n, 2), - _mm512_set1_epi32(0x33333333) - ) - ); - - r_.n = - _mm512_and_si512( - _mm512_add_epi32( - r_.n, - _mm512_srli_epi32(r_.n, 4) - ), - _mm512_set1_epi32(0x0f0f0f0f) - ); - - r_.n = - _mm512_srli_epi32( - _mm512_mullo_epi32( - r_.n, - _mm512_set1_epi32(0x01010101) - ), - (sizeof(uint32_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); - a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); - a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); - v -= ((v >> 1) & UINT32_C(0x55555555)); - v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); - v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_popcnt_epi32 - #define _mm512_popcnt_epi32(a) simde_mm512_popcnt_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_popcnt_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_mask_popcnt_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_popcnt_epi32 - #define _mm512_mask_popcnt_epi32(src, k, a) simde_mm512_mask_popcnt_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_popcnt_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_maskz_popcnt_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_popcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_popcnt_epi32 - #define _mm512_maskz_popcnt_epi32(k, a) simde_mm512_maskz_popcnt_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_popcnt_epi64 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_popcnt_epi64(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_popcnt_epi64(a_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < sizeof(r_.m256i) / sizeof(r_.m256i[0]) ; i++) { - r_.m256i[i] = simde_mm256_popcnt_epi64(a_.m256i[i]); - } - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - const __m512i low_nibble_set = _mm512_set1_epi8(0x0f); - const __m512i high_nibble_of_input = _mm512_andnot_si512(low_nibble_set, a_.n); - const __m512i low_nibble_of_input = _mm512_and_si512(low_nibble_set, a_.n); - const __m512i lut = - simde_mm512_set_epi8( - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, - 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 - ); - - r_.n = - _mm512_sad_epu8( - _mm512_add_epi8( - _mm512_shuffle_epi8( - lut, - low_nibble_of_input - ), - _mm512_shuffle_epi8( - lut, - _mm512_srli_epi16(high_nibble_of_input, 4) - ) - ), - _mm512_setzero_si512() - ); - #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - r_.n = - _mm512_sub_epi64( - a_.n, - _mm512_and_si512( - _mm512_srli_epi64(a_.n, 1), - _mm512_set1_epi64(0x5555555555555555) - ) - ); - - r_.n = - _mm512_add_epi64( - _mm512_and_si512( - r_.n, - _mm512_set1_epi64(0x3333333333333333) - ), - _mm512_and_si512( - _mm512_srli_epi64(r_.n, 2), - _mm512_set1_epi64(0x3333333333333333) - ) - ); - - r_.n = - _mm512_and_si512( - _mm512_add_epi64( - r_.n, - _mm512_srli_epi64(r_.n, 4) - ), - _mm512_set1_epi64(0x0f0f0f0f0f0f0f0f) - ); - - r_.n = - _mm512_srli_epi64( - _mm512_mullo_epi64( - r_.n, - _mm512_set1_epi64(0x0101010101010101) - ), - (sizeof(uint64_t) - 1) * CHAR_BIT - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); - a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); - a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); - v -= ((v >> 1) & UINT64_C(0x5555555555555555)); - v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); - v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_popcnt_epi64 - #define _mm512_popcnt_epi64(a) simde_mm512_popcnt_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_popcnt_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_mask_popcnt_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_popcnt_epi64 - #define _mm512_mask_popcnt_epi64(src, k, a) simde_mm512_mask_popcnt_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_popcnt_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - return _mm512_maskz_popcnt_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_popcnt_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_popcnt_epi64 - #define _mm512_maskz_popcnt_epi64(k, a) simde_mm512_maskz_popcnt_epi64(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_POPCNT_H) */ -/* :: End simde/x86/avx512/popcnt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/range.h :: */ -#if !defined(SIMDE_X86_AVX512_RANGE_H) -#define SIMDE_X86_AVX512_RANGE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_range_ps (simde__m128 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128 r; - - switch (imm8 & 3) { - case 0: - r = simde_mm_min_ps(a, b); - break; - case 1: - r = simde_mm_max_ps(a, b); - break; - case 2: - r = simde_x_mm_select_ps(b, a, simde_mm_cmple_ps(simde_x_mm_abs_ps(a), simde_x_mm_abs_ps(b))); - break; - case 3: - r = simde_x_mm_select_ps(b, a, simde_mm_cmpge_ps(simde_x_mm_abs_ps(a), simde_x_mm_abs_ps(b))); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm_copysign_ps(r, a); - break; - case 8: - r = simde_mm_andnot_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - case 12: - r = simde_mm_or_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_range_ps(a, b, imm8) _mm_range_ps((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_range_ps - #define _mm_range_ps(a, b, imm8) simde_mm_range_ps(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_range_ps(src, k, a, b, imm8) _mm_mask_range_ps(src, k, a, b, imm8) -#else - #define simde_mm_mask_range_ps(src, k, a, b, imm8) simde_mm_mask_mov_ps(src, k, simde_mm_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_ps - #define _mm_mask_range_ps(src, k, a, b, imm8) simde_mm_mask_range_ps(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_range_ps(k, a, b, imm8) _mm_maskz_range_ps(k, a, b, imm8) -#else - #define simde_mm_maskz_range_ps(k, a, b, imm8) simde_mm_maskz_mov_ps(k, simde_mm_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_ps - #define _mm_maskz_range_ps(k, a, b, imm8) simde_mm_maskz_range_ps(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_range_ps (simde__m256 a, simde__m256 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256 r; - - switch (imm8 & 3) { - case 0: - r = simde_mm256_min_ps(a, b); - break; - case 1: - r = simde_mm256_max_ps(a, b); - break; - case 2: - r = simde_x_mm256_select_ps(b, a, simde_mm256_cmp_ps(simde_x_mm256_abs_ps(a), simde_x_mm256_abs_ps(b), SIMDE_CMP_LE_OQ)); - break; - case 3: - r = simde_x_mm256_select_ps(b, a, simde_mm256_cmp_ps(simde_x_mm256_abs_ps(a), simde_x_mm256_abs_ps(b), SIMDE_CMP_GE_OQ)); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm256_copysign_ps(r, a); - break; - case 8: - r = simde_mm256_andnot_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - case 12: - r = simde_mm256_or_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_range_ps(a, b, imm8) _mm256_range_ps((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_range_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_range_ps_a_ = simde__m256_to_private(a), \ - simde_mm256_range_ps_b_ = simde__m256_to_private(b); \ - \ - for (size_t simde_mm256_range_ps_i = 0 ; simde_mm256_range_ps_i < (sizeof(simde_mm256_range_ps_r_.m128) / sizeof(simde_mm256_range_ps_r_.m128[0])) ; simde_mm256_range_ps_i++) { \ - simde_mm256_range_ps_r_.m128[simde_mm256_range_ps_i] = simde_mm_range_ps(simde_mm256_range_ps_a_.m128[simde_mm256_range_ps_i], simde_mm256_range_ps_b_.m128[simde_mm256_range_ps_i], imm8); \ - } \ - \ - simde__m256_from_private(simde_mm256_range_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_range_ps - #define _mm256_range_ps(a, b, imm8) simde_mm256_range_ps(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_range_ps(src, k, a, b, imm8) _mm256_mask_range_ps(src, k, a, b, imm8) -#else - #define simde_mm256_mask_range_ps(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_range_ps - #define _mm256_mask_range_ps(src, k, a, b, imm8) simde_mm256_mask_range_ps(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_range_ps(k, a, b, imm8) _mm256_maskz_range_ps(k, a, b, imm8) -#else - #define simde_mm256_maskz_range_ps(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_range_ps - #define _mm256_maskz_range_ps(k, a, b, imm8) simde_mm256_maskz_range_ps(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_range_ps (simde__m512 a, simde__m512 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m512 r; - - switch (imm8 & 3) { - case 0: - r = simde_mm512_min_ps(a, b); - break; - case 1: - r = simde_mm512_max_ps(a, b); - break; - case 2: - r = simde_mm512_mask_mov_ps(b, simde_mm512_cmp_ps_mask(simde_mm512_abs_ps(a), simde_mm512_abs_ps(b), SIMDE_CMP_LE_OS), a); - break; - case 3: - r = simde_mm512_mask_mov_ps(a, simde_mm512_cmp_ps_mask(simde_mm512_abs_ps(b), simde_mm512_abs_ps(a), SIMDE_CMP_GE_OS), b); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm512_copysign_ps(r, a); - break; - case 8: - r = simde_mm512_andnot_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - case 12: - r = simde_mm512_or_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm512_range_ps(a, b, imm8) _mm512_range_ps((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_range_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_range_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_range_ps_b_ = simde__m512_to_private(b); \ - \ - for (size_t simde_mm512_range_ps_i = 0 ; simde_mm512_range_ps_i < (sizeof(simde_mm512_range_ps_r_.m128) / sizeof(simde_mm512_range_ps_r_.m128[0])) ; simde_mm512_range_ps_i++) { \ - simde_mm512_range_ps_r_.m128[simde_mm512_range_ps_i] = simde_mm_range_ps(simde_mm512_range_ps_a_.m128[simde_mm512_range_ps_i], simde_mm512_range_ps_b_.m128[simde_mm512_range_ps_i], imm8); \ - } \ - \ - simde__m512_from_private(simde_mm512_range_ps_r_); \ - })) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_range_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_range_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_range_ps_b_ = simde__m512_to_private(b); \ - \ - for (size_t simde_mm512_range_ps_i = 0 ; simde_mm512_range_ps_i < (sizeof(simde_mm512_range_ps_r_.m256) / sizeof(simde_mm512_range_ps_r_.m256[0])) ; simde_mm512_range_ps_i++) { \ - simde_mm512_range_ps_r_.m256[simde_mm512_range_ps_i] = simde_mm256_range_ps(simde_mm512_range_ps_a_.m256[simde_mm512_range_ps_i], simde_mm512_range_ps_b_.m256[simde_mm512_range_ps_i], imm8); \ - } \ - \ - simde__m512_from_private(simde_mm512_range_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_range_ps - #define _mm512_range_ps(a, b, imm8) simde_mm512_range_ps(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_range_ps(src, k, a, b, imm8) _mm512_mask_range_ps(src, k, a, b, imm8) -#else - #define simde_mm512_mask_range_ps(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_range_ps - #define _mm512_mask_range_ps(src, k, a, b, imm8) simde_mm512_mask_range_ps(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_range_ps(k, a, b, imm8) _mm512_maskz_range_ps(k, a, b, imm8) -#else - #define simde_mm512_maskz_range_ps(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_range_ps(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_range_ps - #define _mm512_maskz_range_ps(k, a, b, imm8) simde_mm512_maskz_range_ps(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_range_pd (simde__m128d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128d r; - - switch (imm8 & 3) { - case 0: - r = simde_mm_min_pd(a, b); - break; - case 1: - r = simde_mm_max_pd(a, b); - break; - case 2: - r = simde_x_mm_select_pd(b, a, simde_mm_cmple_pd(simde_x_mm_abs_pd(a), simde_x_mm_abs_pd(b))); - break; - case 3: - r = simde_x_mm_select_pd(b, a, simde_mm_cmpge_pd(simde_x_mm_abs_pd(a), simde_x_mm_abs_pd(b))); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm_copysign_pd(r, a); - break; - case 8: - r = simde_mm_andnot_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - case 12: - r = simde_mm_or_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_range_pd(a, b, imm8) _mm_range_pd((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_range_pd - #define _mm_range_pd(a, b, imm8) simde_mm_range_pd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_range_pd(src, k, a, b, imm8) _mm_mask_range_pd(src, k, a, b, imm8) -#else - #define simde_mm_mask_range_pd(src, k, a, b, imm8) simde_mm_mask_mov_pd(src, k, simde_mm_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_pd - #define _mm_mask_range_pd(src, k, a, b, imm8) simde_mm_mask_range_pd(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_range_pd(k, a, b, imm8) _mm_maskz_range_pd(k, a, b, imm8) -#else - #define simde_mm_maskz_range_pd(k, a, b, imm8) simde_mm_maskz_mov_pd(k, simde_mm_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_pd - #define _mm_maskz_range_pd(k, a, b, imm8) simde_mm_maskz_range_pd(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_range_pd (simde__m256d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d r; - - switch (imm8 & 3) { - case 0: - r = simde_mm256_min_pd(a, b); - break; - case 1: - r = simde_mm256_max_pd(a, b); - break; - case 2: - r = simde_x_mm256_select_pd(b, a, simde_mm256_cmp_pd(simde_x_mm256_abs_pd(a), simde_x_mm256_abs_pd(b), SIMDE_CMP_LE_OQ)); - break; - case 3: - r = simde_x_mm256_select_pd(b, a, simde_mm256_cmp_pd(simde_x_mm256_abs_pd(a), simde_x_mm256_abs_pd(b), SIMDE_CMP_GE_OQ)); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm256_copysign_pd(r, a); - break; - case 8: - r = simde_mm256_andnot_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - case 12: - r = simde_mm256_or_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_range_pd(a, b, imm8) _mm256_range_pd((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_range_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_range_pd_a_ = simde__m256d_to_private(a), \ - simde_mm256_range_pd_b_ = simde__m256d_to_private(b); \ - \ - for (size_t simde_mm256_range_pd_i = 0 ; simde_mm256_range_pd_i < (sizeof(simde_mm256_range_pd_r_.m128d) / sizeof(simde_mm256_range_pd_r_.m128d[0])) ; simde_mm256_range_pd_i++) { \ - simde_mm256_range_pd_r_.m128d[simde_mm256_range_pd_i] = simde_mm_range_pd(simde_mm256_range_pd_a_.m128d[simde_mm256_range_pd_i], simde_mm256_range_pd_b_.m128d[simde_mm256_range_pd_i], imm8); \ - } \ - \ - simde__m256d_from_private(simde_mm256_range_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_range_pd - #define _mm256_range_pd(a, b, imm8) simde_mm256_range_pd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_range_pd(src, k, a, b, imm8) _mm256_mask_range_pd(src, k, a, b, imm8) -#else - #define simde_mm256_mask_range_pd(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_range_pd - #define _mm256_mask_range_pd(src, k, a, b, imm8) simde_mm256_mask_range_pd(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_range_pd(k, a, b, imm8) _mm256_maskz_range_pd(k, a, b, imm8) -#else - #define simde_mm256_maskz_range_pd(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_range_pd - #define _mm256_maskz_range_pd(k, a, b, imm8) simde_mm256_maskz_range_pd(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_range_pd (simde__m512d a, simde__m512d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m512d r; - - switch (imm8 & 3) { - case 0: - r = simde_mm512_min_pd(a, b); - break; - case 1: - r = simde_mm512_max_pd(a, b); - break; - case 2: - r = simde_mm512_mask_mov_pd(b, simde_mm512_cmp_pd_mask(simde_mm512_abs_pd(a), simde_mm512_abs_pd(b), SIMDE_CMP_LE_OS), a); - break; - case 3: - r = simde_mm512_mask_mov_pd(a, simde_mm512_cmp_pd_mask(simde_mm512_abs_pd(b), simde_mm512_abs_pd(a), SIMDE_CMP_GE_OS), b); - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r = simde_x_mm512_copysign_pd(r, a); - break; - case 8: - r = simde_mm512_andnot_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - case 12: - r = simde_mm512_or_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); - break; - default: - break; - } - - return r; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm512_range_pd(a, b, imm8) _mm512_range_pd((a), (b), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_range_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_range_pd_a_ = simde__m512d_to_private(a), \ - simde_mm512_range_pd_b_ = simde__m512d_to_private(b); \ - \ - for (size_t simde_mm512_range_pd_i = 0 ; simde_mm512_range_pd_i < (sizeof(simde_mm512_range_pd_r_.m128d) / sizeof(simde_mm512_range_pd_r_.m128d[0])) ; simde_mm512_range_pd_i++) { \ - simde_mm512_range_pd_r_.m128d[simde_mm512_range_pd_i] = simde_mm_range_pd(simde_mm512_range_pd_a_.m128d[simde_mm512_range_pd_i], simde_mm512_range_pd_b_.m128d[simde_mm512_range_pd_i], imm8); \ - } \ - \ - simde__m512d_from_private(simde_mm512_range_pd_r_); \ - })) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_range_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_range_pd_a_ = simde__m512d_to_private(a), \ - simde_mm512_range_pd_b_ = simde__m512d_to_private(b); \ - \ - for (size_t simde_mm512_range_pd_i = 0 ; simde_mm512_range_pd_i < (sizeof(simde_mm512_range_pd_r_.m256d) / sizeof(simde_mm512_range_pd_r_.m256d[0])) ; simde_mm512_range_pd_i++) { \ - simde_mm512_range_pd_r_.m256d[simde_mm512_range_pd_i] = simde_mm256_range_pd(simde_mm512_range_pd_a_.m256d[simde_mm512_range_pd_i], simde_mm512_range_pd_b_.m256d[simde_mm512_range_pd_i], imm8); \ - } \ - \ - simde__m512d_from_private(simde_mm512_range_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_range_pd - #define _mm512_range_pd(a, b, imm8) simde_mm512_range_pd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_range_pd(src, k, a, b, imm8) _mm512_mask_range_pd(src, k, a, b, imm8) -#else - #define simde_mm512_mask_range_pd(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_range_pd - #define _mm512_mask_range_pd(src, k, a, b, imm8) simde_mm512_mask_range_pd(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_range_pd(k, a, b, imm8) _mm512_maskz_range_pd(k, a, b, imm8) -#else - #define simde_mm512_maskz_range_pd(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_range_pd(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_range_pd - #define _mm512_maskz_range_pd(k, a, b, imm8) simde_mm512_maskz_range_pd(k, a, b, imm8) -#endif - -#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_x_mm_range_ss(a, b, imm8) simde_mm_move_ss(a, simde_mm_range_ps(a, b, imm8)) -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - #define simde_x_mm_range_ss(a, b, imm8) simde_mm_move_ss(a, simde_mm_range_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b), imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_x_mm_range_ss (simde__m128 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - simde_float32 abs_a = simde_uint32_as_float32(a_.u32[0] & UINT32_C(2147483647)); - simde_float32 abs_b = simde_uint32_as_float32(b_.u32[0] & UINT32_C(2147483647)); - - switch (imm8 & 3) { - case 0: - r_ = simde__m128_to_private(simde_mm_min_ss(a, b)); - break; - case 1: - r_ = simde__m128_to_private(simde_mm_max_ss(a, b)); - break; - case 2: - r_.f32[0] = abs_a <= abs_b ? a_.f32[0] : b_.f32[0]; - break; - case 3: - r_.f32[0] = abs_b >= abs_a ? b_.f32[0] : a_.f32[0]; - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r_.f32[0] = simde_uint32_as_float32((a_.u32[0] & UINT32_C(2147483648)) ^ (r_.u32[0] & UINT32_C(2147483647))); - break; - case 8: - r_.f32[0] = simde_uint32_as_float32(r_.u32[0] & UINT32_C(2147483647)); - break; - case 12: - r_.f32[0] = simde_uint32_as_float32(r_.u32[0] | UINT32_C(2147483648)); - break; - default: - break; - } - - return simde__m128_from_private(r_); - } -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_mask_range_ss(src, k, a, b, imm8) _mm_mask_range_ss(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_mask_range_ss(src, k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128_private \ - simde_mm_mask_range_ss_r_ = simde__m128_to_private(a), \ - simde_mm_mask_range_ss_src_ = simde__m128_to_private(src); \ - \ - if (k & 1) \ - simde_mm_mask_range_ss_r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); \ - else \ - simde_mm_mask_range_ss_r_.f32[0] = simde_mm_mask_range_ss_src_.f32[0]; \ - \ - simde__m128_from_private(simde_mm_mask_range_ss_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_mask_range_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - src_ = simde__m128_to_private(src); - - if (k & 1) - r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); - else - r_.f32[0] = src_.f32[0]; - - return simde__m128_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_ss - #define _mm_mask_range_ss(src, k, a, b, imm8) simde_mm_mask_range_ss(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_maskz_range_ss(k, a, b, imm8) _mm_maskz_range_ss(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_maskz_range_ss(k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128_private simde_mm_maskz_range_ss_r_ = simde__m128_to_private(a); \ - \ - if (k & 1) \ - simde_mm_maskz_range_ss_r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); \ - else \ - simde_mm_maskz_range_ss_r_.f32[0] = SIMDE_FLOAT32_C(0.0); \ - \ - simde__m128_from_private(simde_mm_maskz_range_ss_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_maskz_range_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private r_ = simde__m128_to_private(a); - - if (k & 1) - r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); - else - r_.f32[0] = SIMDE_FLOAT32_C(0.0); - - return simde__m128_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_ss - #define _mm_maskz_range_ss(k, a, b, imm8) simde_mm_maskz_range_ss(k, a, b, imm8) -#endif - -#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_x_mm_range_sd(a, b, imm8) simde_mm_move_sd(a, simde_mm_range_pd(a, b, imm8)) -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - #define simde_x_mm_range_sd(a, b, imm8) simde_mm_move_sd(a, simde_mm_range_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b), imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_x_mm_range_sd (simde__m128d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - simde_float64 abs_a = simde_uint64_as_float64(a_.u64[0] & UINT64_C(9223372036854775807)); - simde_float64 abs_b = simde_uint64_as_float64(b_.u64[0] & UINT64_C(9223372036854775807)); - - switch (imm8 & 3) { - case 0: - r_ = simde__m128d_to_private(simde_mm_min_sd(a, b)); - break; - case 1: - r_ = simde__m128d_to_private(simde_mm_max_sd(a, b)); - break; - case 2: - r_.f64[0] = abs_a <= abs_b ? a_.f64[0] : b_.f64[0]; - break; - case 3: - r_.f64[0] = abs_b >= abs_a ? b_.f64[0] : a_.f64[0]; - break; - default: - break; - } - - switch (imm8 & 12) { - case 0: - r_.f64[0] = simde_uint64_as_float64((a_.u64[0] & UINT64_C(9223372036854775808)) ^ (r_.u64[0] & UINT64_C(9223372036854775807))); - break; - case 8: - r_.f64[0] = simde_uint64_as_float64(r_.u64[0] & UINT64_C(9223372036854775807)); - break; - case 12: - r_.f64[0] = simde_uint64_as_float64(r_.u64[0] | UINT64_C(9223372036854775808)); - break; - default: - break; - } - - return simde__m128d_from_private(r_); - } -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_mask_range_sd(src, k, a, b, imm8) _mm_mask_range_sd(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_mask_range_sd(src, k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d_private \ - simde_mm_mask_range_sd_r_ = simde__m128d_to_private(a), \ - simde_mm_mask_range_sd_src_ = simde__m128d_to_private(src); \ - \ - if (k & 1) \ - simde_mm_mask_range_sd_r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); \ - else \ - simde_mm_mask_range_sd_r_.f64[0] = simde_mm_mask_range_sd_src_.f64[0]; \ - \ - simde__m128d_from_private(simde_mm_mask_range_sd_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_mask_range_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - src_ = simde__m128d_to_private(src); - - if (k & 1) - r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); - else - r_.f64[0] = src_.f64[0]; - - return simde__m128d_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_sd - #define _mm_mask_range_sd(src, k, a, b, imm8) simde_mm_mask_range_sd(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_maskz_range_sd(k, a, b, imm8) _mm_maskz_range_sd(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_maskz_range_sd(k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d_private simde_mm_maskz_range_sd_r_ = simde__m128d_to_private(a); \ - \ - if (k & 1) \ - simde_mm_maskz_range_sd_r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); \ - else \ - simde_mm_maskz_range_sd_r_.f64[0] = SIMDE_FLOAT64_C(0.0); \ - \ - simde__m128d_from_private(simde_mm_maskz_range_sd_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_maskz_range_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128d_private r_ = simde__m128d_to_private(a); - - if (k & 1) - r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); - else - r_.f64[0] = SIMDE_FLOAT64_C(0.0); - - return simde__m128d_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_sd - #define _mm_maskz_range_sd(k, a, b, imm8) simde_mm_maskz_range_sd(k, a, b, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_RANGE_H) */ -/* :: End simde/x86/avx512/range.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/range_round.h :: */ -#if !defined(SIMDE_X86_AVX512_RANGE_ROUND_H) -#define SIMDE_X86_AVX512_RANGE_ROUND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_range_round_ps(a, b, imm8, sae) _mm512_range_round_ps(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_range_round_ps(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_range_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_range_round_ps_envp; \ - int simde_mm512_range_round_ps_x = feholdexcept(&simde_mm512_range_round_ps_envp); \ - simde_mm512_range_round_ps_r = simde_mm512_range_ps(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_range_round_ps_x == 0)) \ - fesetenv(&simde_mm512_range_round_ps_envp); \ - } \ - else { \ - simde_mm512_range_round_ps_r = simde_mm512_range_ps(a, b, imm8); \ - } \ - \ - simde_mm512_range_round_ps_r; \ - })) - #else - #define simde_mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_ps(a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_range_round_ps (simde__m512 a, simde__m512 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_range_ps(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_range_ps(a, b, imm8); - #endif - } - else { - r = simde_mm512_range_ps(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_range_round_ps - #define _mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_round_ps(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) _mm512_mask_range_round_ps(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_ps(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_mask_range_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_range_round_ps_envp; \ - int simde_mm512_mask_range_round_ps_x = feholdexcept(&simde_mm512_mask_range_round_ps_envp); \ - simde_mm512_mask_range_round_ps_r = simde_mm512_mask_range_ps(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_range_round_ps_x == 0)) \ - fesetenv(&simde_mm512_mask_range_round_ps_envp); \ - } \ - else { \ - simde_mm512_mask_range_round_ps_r = simde_mm512_mask_range_ps(src, k, a, b, imm8); \ - } \ - \ - simde_mm512_mask_range_round_ps_r; \ - })) - #else - #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_ps(src, k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_mask_range_round_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_range_ps(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_range_ps(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm512_mask_range_ps(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_range_round_ps - #define _mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) _mm512_maskz_range_round_ps(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_ps(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_maskz_range_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_range_round_ps_envp; \ - int simde_mm512_maskz_range_round_ps_x = feholdexcept(&simde_mm512_maskz_range_round_ps_envp); \ - simde_mm512_maskz_range_round_ps_r = simde_mm512_maskz_range_ps(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_range_round_ps_x == 0)) \ - fesetenv(&simde_mm512_maskz_range_round_ps_envp); \ - } \ - else { \ - simde_mm512_maskz_range_round_ps_r = simde_mm512_maskz_range_ps(k, a, b, imm8); \ - } \ - \ - simde_mm512_maskz_range_round_ps_r; \ - })) - #else - #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_ps(k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_maskz_range_round_ps (simde__mmask16 k, simde__m512 a, simde__m512 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_range_ps(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_range_ps(k, a, b, imm8); - #endif - } - else { - r = simde_mm512_maskz_range_ps(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_range_round_ps - #define _mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_range_round_pd(a, b, imm8, sae) _mm512_range_round_pd(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_pd(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_range_round_pd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_range_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_range_round_pd_envp; \ - int simde_mm512_range_round_pd_x = feholdexcept(&simde_mm512_range_round_pd_envp); \ - simde_mm512_range_round_pd_r = simde_mm512_range_pd(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_range_round_pd_x == 0)) \ - fesetenv(&simde_mm512_range_round_pd_envp); \ - } \ - else { \ - simde_mm512_range_round_pd_r = simde_mm512_range_pd(a, b, imm8); \ - } \ - \ - simde_mm512_range_round_pd_r; \ - })) - #else - #define simde_mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_pd(a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_range_round_pd (simde__m512d a, simde__m512d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_range_pd(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_range_pd(a, b, imm8); - #endif - } - else { - r = simde_mm512_range_pd(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_range_round_pd - #define _mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_round_pd(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) _mm512_mask_range_round_pd(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_pd(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_mask_range_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_range_round_pd_envp; \ - int simde_mm512_mask_range_round_pd_x = feholdexcept(&simde_mm512_mask_range_round_pd_envp); \ - simde_mm512_mask_range_round_pd_r = simde_mm512_mask_range_pd(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_range_round_pd_x == 0)) \ - fesetenv(&simde_mm512_mask_range_round_pd_envp); \ - } \ - else { \ - simde_mm512_mask_range_round_pd_r = simde_mm512_mask_range_pd(src, k, a, b, imm8); \ - } \ - \ - simde_mm512_mask_range_round_pd_r; \ - })) - #else - #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_pd(src, k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_mask_range_round_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_range_pd(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_range_pd(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm512_mask_range_pd(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_range_round_pd - #define _mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) _mm512_maskz_range_round_pd(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_pd(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_maskz_range_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_range_round_pd_envp; \ - int simde_mm512_maskz_range_round_pd_x = feholdexcept(&simde_mm512_maskz_range_round_pd_envp); \ - simde_mm512_maskz_range_round_pd_r = simde_mm512_maskz_range_pd(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_range_round_pd_x == 0)) \ - fesetenv(&simde_mm512_maskz_range_round_pd_envp); \ - } \ - else { \ - simde_mm512_maskz_range_round_pd_r = simde_mm512_maskz_range_pd(k, a, b, imm8); \ - } \ - \ - simde_mm512_maskz_range_round_pd_r; \ - })) - #else - #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_pd(k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_maskz_range_round_pd (simde__mmask8 k, simde__m512d a, simde__m512d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_range_pd(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_range_pd(k, a, b, imm8); - #endif - } - else { - r = simde_mm512_maskz_range_pd(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_range_round_pd - #define _mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_range_round_ss(a, b, imm8, sae) _mm_range_round_ss(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_range_round_ss(a, b, imm8, sae) simde_x_mm_range_ss(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_range_round_ss(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_range_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_range_round_ss_envp; \ - int simde_mm_range_round_ss_x = feholdexcept(&simde_mm_range_round_ss_envp); \ - simde_mm_range_round_ss_r = simde_x_mm_range_ss(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_range_round_ss_x == 0)) \ - fesetenv(&simde_mm_range_round_ss_envp); \ - } \ - else { \ - simde_mm_range_round_ss_r = simde_x_mm_range_ss(a, b, imm8); \ - } \ - \ - simde_mm_range_round_ss_r; \ - })) - #else - #define simde_mm_range_round_ss(a, b, imm8, sae) simde_x_mm_range_ss(a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_range_round_ss (simde__m128 a, simde__m128 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_x_mm_range_ss(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_x_mm_range_ss(a, b, imm8); - #endif - } - else { - r = simde_x_mm_range_ss(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_range_round_ss - #define _mm_range_round_ss(a, b, imm8, sae) simde_mm_range_round_ss(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) _mm_mask_range_round_ss(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_ss(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_mask_range_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_range_round_ss_envp; \ - int simde_mm_mask_range_round_ss_x = feholdexcept(&simde_mm_mask_range_round_ss_envp); \ - simde_mm_mask_range_round_ss_r = simde_mm_mask_range_ss(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_range_round_ss_x == 0)) \ - fesetenv(&simde_mm_mask_range_round_ss_envp); \ - } \ - else { \ - simde_mm_mask_range_round_ss_r = simde_mm_mask_range_ss(src, k, a, b, imm8); \ - } \ - \ - simde_mm_mask_range_round_ss_r; \ - })) - #else - #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_ss(src, k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_mask_range_round_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_range_ss(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_range_ss(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm_mask_range_ss(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_round_ss - #define _mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) _mm_maskz_range_round_ss(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_ss(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_maskz_range_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_range_round_ss_envp; \ - int simde_mm_maskz_range_round_ss_x = feholdexcept(&simde_mm_maskz_range_round_ss_envp); \ - simde_mm_maskz_range_round_ss_r = simde_mm_maskz_range_ss(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_range_round_ss_x == 0)) \ - fesetenv(&simde_mm_maskz_range_round_ss_envp); \ - } \ - else { \ - simde_mm_maskz_range_round_ss_r = simde_mm_maskz_range_ss(k, a, b, imm8); \ - } \ - \ - simde_mm_maskz_range_round_ss_r; \ - })) - #else - #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_ss(k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_maskz_range_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_range_ss(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_range_ss(k, a, b, imm8); - #endif - } - else { - r = simde_mm_maskz_range_ss(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_round_ss - #define _mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_range_round_sd(a, b, imm8, sae) _mm_range_round_sd(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_range_round_sd(a, b, imm8, sae) simde_x_mm_range_sd(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_range_round_sd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_range_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_range_round_sd_envp; \ - int simde_mm_range_round_sd_x = feholdexcept(&simde_mm_range_round_sd_envp); \ - simde_mm_range_round_sd_r = simde_x_mm_range_sd(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_range_round_sd_x == 0)) \ - fesetenv(&simde_mm_range_round_sd_envp); \ - } \ - else { \ - simde_mm_range_round_sd_r = simde_x_mm_range_sd(a, b, imm8); \ - } \ - \ - simde_mm_range_round_sd_r; \ - })) - #else - #define simde_mm_range_round_sd(a, b, imm8, sae) simde_x_mm_range_sd(a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_range_round_sd (simde__m128d a, simde__m128d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_x_mm_range_sd(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_x_mm_range_sd(a, b, imm8); - #endif - } - else { - r = simde_x_mm_range_sd(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_range_round_sd - #define _mm_range_round_sd(a, b, imm8, sae) simde_mm_range_round_sd(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) _mm_mask_range_round_sd(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_sd(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_mask_range_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_range_round_sd_envp; \ - int simde_mm_mask_range_round_sd_x = feholdexcept(&simde_mm_mask_range_round_sd_envp); \ - simde_mm_mask_range_round_sd_r = simde_mm_mask_range_sd(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_range_round_sd_x == 0)) \ - fesetenv(&simde_mm_mask_range_round_sd_envp); \ - } \ - else { \ - simde_mm_mask_range_round_sd_r = simde_mm_mask_range_sd(src, k, a, b, imm8); \ - } \ - \ - simde_mm_mask_range_round_sd_r; \ - })) - #else - #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_sd(src, k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_mask_range_round_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_range_sd(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_range_sd(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm_mask_range_sd(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_range_round_sd - #define _mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) _mm_maskz_range_round_sd(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_sd(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_maskz_range_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_range_round_sd_envp; \ - int simde_mm_maskz_range_round_sd_x = feholdexcept(&simde_mm_maskz_range_round_sd_envp); \ - simde_mm_maskz_range_round_sd_r = simde_mm_maskz_range_sd(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_range_round_sd_x == 0)) \ - fesetenv(&simde_mm_maskz_range_round_sd_envp); \ - } \ - else { \ - simde_mm_maskz_range_round_sd_r = simde_mm_maskz_range_sd(k, a, b, imm8); \ - } \ - \ - simde_mm_maskz_range_round_sd_r; \ - })) - #else - #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_sd(k, a, b, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_maskz_range_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8, int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_range_sd(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_range_sd(k, a, b, imm8); - #endif - } - else { - r = simde_mm_maskz_range_sd(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_range_round_sd - #define _mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_RANGE_ROUND_H) */ -/* :: End simde/x86/avx512/range_round.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/rcp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_RCP_H) -#define SIMDE_X86_AVX512_RCP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -// TODO: "The maximum relative error for this approximation is less than 2^-14." -// vs 1.5*2^-12 for _mm{,256}_rcp_ps - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_rcp14_ps (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rcp14_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rcp14_ps - #define _mm512_rcp14_ps(a) simde_mm512_rcp14_ps(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_RCP_H) */ -/* :: End simde/x86/avx512/rcp.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/reduce.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_REDUCE_H) -#define SIMDE_X86_AVX512_REDUCE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16 -simde_mm512_reduce_max_ph(simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_reduce_max_ph(a); - #else - simde__m512h_private a_; - simde_float16 r; - a_ = simde__m512h_to_private(a); - - r = SIMDE_NINFINITYHF; - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE_REDUCTION(max:r) - #endif - for (size_t i = 0 ; i < (sizeof(a_.f16) / sizeof(a_.f16[0])) ; i++) { - r = simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(r) ? a_.f16[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_ph(a) simde_mm512_reduce_max_ph((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16 -simde_mm512_reduce_min_ph(simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_reduce_min_ph(a); - #else - simde__m512h_private a_; - simde_float16 r; - a_ = simde__m512h_to_private(a); - - r = SIMDE_INFINITYHF; - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE_REDUCTION(min:r) - #endif - for (size_t i = 0 ; i < (sizeof(a_.f16) / sizeof(a_.f16[0])) ; i++) { - r = simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(r) ? a_.f16[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_ph(a) simde_mm512_reduce_min_ph((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm512_reduce_max_epi32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_epi32(a); - #else - simde__m512i_private a_; - int32_t r; - a_ = simde__m512i_to_private(a); - - r = -INT32_MAX; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r = a_.i32[i] > r ? a_.i32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_epi32(a) simde_mm512_reduce_max_epi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm512_reduce_max_epi64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_epi64(a); - #else - simde__m512i_private a_; - int64_t r; - a_ = simde__m512i_to_private(a); - - r = -INT64_MAX; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r = a_.i64[i] > r ? a_.i64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_epi64(a) simde_mm512_reduce_max_epi64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm512_reduce_max_epu32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_epu32(a); - #else - simde__m512i_private a_; - uint32_t r; - a_ = simde__m512i_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r = a_.u32[i] > r ? a_.u32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_epu32(a) simde_mm512_reduce_max_epu32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm512_reduce_max_epu64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_epu64(a); - #else - simde__m512i_private a_; - uint64_t r; - a_ = simde__m512i_to_private(a); - - r = 0; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r = a_.u64[i] > r ? a_.u64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_epu64(a) simde_mm512_reduce_max_epu64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm512_reduce_max_pd(simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_pd(a); - #else - simde__m512d_private a_; - simde_float64 r; - a_ = simde__m512d_to_private(a); - - r = -SIMDE_MATH_INFINITY; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r = a_.f64[i] > r ? a_.f64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_pd(a) simde_mm512_reduce_max_pd((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm512_reduce_max_ps(simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_max_ps(a); - #else - simde__m512_private a_; - simde_float32 r; - a_ = simde__m512_to_private(a); - - r = -SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(max:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r = a_.f32[i] > r ? a_.f32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_max_ps(a) simde_mm512_reduce_max_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm512_reduce_min_epi32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_epi32(a); - #else - simde__m512i_private a_; - int32_t r; - a_ = simde__m512i_to_private(a); - - r = INT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r = a_.i32[i] < r ? a_.i32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_epi32(a) simde_mm512_reduce_min_epi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm512_reduce_min_epi64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_epi64(a); - #else - simde__m512i_private a_; - int64_t r; - a_ = simde__m512i_to_private(a); - - r = INT64_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r = a_.i64[i] < r ? a_.i64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_epi64(a) simde_mm512_reduce_min_epi64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm512_reduce_min_epu32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_epu32(a); - #else - simde__m512i_private a_; - uint32_t r; - a_ = simde__m512i_to_private(a); - - r = UINT32_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r = a_.u32[i] < r ? a_.u32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_epu32(a) simde_mm512_reduce_min_epu32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm512_reduce_min_epu64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_epu64(a); - #else - simde__m512i_private a_; - uint64_t r; - a_ = simde__m512i_to_private(a); - - r = UINT64_MAX; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r = a_.u64[i] < r ? a_.u64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_epu64(a) simde_mm512_reduce_min_epu64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm512_reduce_min_pd(simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_pd(a); - #else - simde__m512d_private a_; - simde_float64 r; - a_ = simde__m512d_to_private(a); - - r = SIMDE_MATH_INFINITY; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r = a_.f64[i] < r ? a_.f64[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_pd(a) simde_mm512_reduce_min_pd((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm512_reduce_min_ps(simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_reduce_min_ps(a); - #else - simde__m512_private a_; - simde_float32 r; - a_ = simde__m512_to_private(a); - - r = SIMDE_MATH_INFINITYF; - SIMDE_VECTORIZE_REDUCTION(min:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r = a_.f32[i] < r ? a_.f32[i] : r; - } - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_reduce_min_ps(a) simde_mm512_reduce_min_ps((a)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_REDUCE_H) */ -/* :: End simde/x86/avx512/reduce.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/rol.h :: */ -#if !defined(SIMDE_X86_AVX512_ROL_H) -#define SIMDE_X86_AVX512_ROL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_rol_epi32(a, imm8) _mm_rol_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_rol_epi32 (simde__m128i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m128i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rol_epi32 - #define _mm_rol_epi32(a, imm8) simde_mm_rol_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_rol_epi32(src, k, a, imm8) _mm_mask_rol_epi32(src, k, a, imm8) -#else - #define simde_mm_mask_rol_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rol_epi32 - #define _mm_mask_rol_epi32(src, k, a, imm8) simde_mm_mask_rol_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_rol_epi32(k, a, imm8) _mm_maskz_rol_epi32(k, a, imm8) -#else - #define simde_mm_maskz_rol_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rol_epi32 - #define _mm_maskz_rol_epi32(k, a, imm8) simde_mm_maskz_rol_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_rol_epi32(a, imm8) _mm256_rol_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_rol_epi32 (simde__m256i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m256i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rol_epi32 - #define _mm256_rol_epi32(a, imm8) simde_mm256_rol_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_rol_epi32(src, k, a, imm8) _mm256_mask_rol_epi32(src, k, a, imm8) -#else - #define simde_mm256_mask_rol_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rol_epi32 - #define _mm256_mask_rol_epi32(src, k, a, imm8) simde_mm256_mask_rol_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_rol_epi32(k, a, imm8) _mm256_maskz_rol_epi32(k, a, imm8) -#else - #define simde_mm256_maskz_rol_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rol_epi32 - #define _mm256_maskz_rol_epi32(k, a, imm8) simde_mm256_maskz_rol_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_rol_epi32(a, imm8) _mm512_rol_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_rol_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m512i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rol_epi32 - #define _mm512_rol_epi32(a, imm8) simde_mm512_rol_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_rol_epi32(src, k, a, imm8) _mm512_mask_rol_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_rol_epi32(src, k, a, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rol_epi32 - #define _mm512_mask_rol_epi32(src, k, a, imm8) simde_mm512_mask_rol_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_rol_epi32(k, a, imm8) _mm512_maskz_rol_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_rol_epi32(k, a, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_rol_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rol_epi32 - #define _mm512_maskz_rol_epi32(k, a, imm8) simde_mm512_maskz_rol_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_rol_epi64(a, imm8) _mm_rol_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_rol_epi64 (simde__m128i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m128i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rol_epi64 - #define _mm_rol_epi64(a, imm8) simde_mm_rol_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_rol_epi64(src, k, a, imm8) _mm_mask_rol_epi64(src, k, a, imm8) -#else - #define simde_mm_mask_rol_epi64(src, k, a, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rol_epi64 - #define _mm_mask_rol_epi64(src, k, a, imm8) simde_mm_mask_rol_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_rol_epi64(k, a, imm8) _mm_maskz_rol_epi64(k, a, imm8) -#else - #define simde_mm_maskz_rol_epi64(k, a, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rol_epi64 - #define _mm_maskz_rol_epi64(k, a, imm8) simde_mm_maskz_rol_epi64(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_rol_epi64(a, imm8) _mm256_rol_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_rol_epi64 (simde__m256i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m256i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rol_epi64 - #define _mm256_rol_epi64(a, imm8) simde_mm256_rol_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_rol_epi64(src, k, a, imm8) _mm256_mask_rol_epi64(src, k, a, imm8) -#else - #define simde_mm256_mask_rol_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rol_epi64 - #define _mm256_mask_rol_epi64(src, k, a, imm8) simde_mm256_mask_rol_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_rol_epi64(k, a, imm8) _mm256_maskz_rol_epi64(k, a, imm8) -#else - #define simde_mm256_maskz_rol_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rol_epi64 - #define _mm256_maskz_rol_epi64(k, a, imm8) simde_mm256_maskz_rol_epi64(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_rol_epi64(a, imm8) _mm512_rol_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_rol_epi64 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m512i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rol_epi64 - #define _mm512_rol_epi64(a, imm8) simde_mm512_rol_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_rol_epi64(src, k, a, imm8) _mm512_mask_rol_epi64(src, k, a, imm8) -#else - #define simde_mm512_mask_rol_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rol_epi64 - #define _mm512_mask_rol_epi64(src, k, a, imm8) simde_mm512_mask_rol_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_rol_epi64(k, a, imm8) _mm512_maskz_rol_epi64(k, a, imm8) -#else - #define simde_mm512_maskz_rol_epi64(k, a, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_rol_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rol_epi64 - #define _mm512_maskz_rol_epi64(k, a, imm8) simde_mm512_maskz_rol_epi64(k, a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROL_H) */ -/* :: End simde/x86/avx512/rol.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/rolv.h :: */ -#if !defined(SIMDE_X86_AVX512_ROLV_H) -#define SIMDE_X86_AVX512_ROLV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srlv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRLV_H) -#define SIMDE_X86_AVX512_SRLV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_srlv_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi16 - #define _mm_srlv_epi16(a, b) simde_mm_srlv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_srlv_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_srlv_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_srlv_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_srlv_epi16 - #define _mm_mask_srlv_epi16(src, k, a, b) simde_mm_mask_srlv_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_srlv_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_srlv_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_srlv_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_srlv_epi16 - #define _mm_maskz_srlv_epi16(k, a, b) simde_mm_maskz_srlv_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_srlv_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_srlv_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_srlv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_srlv_epi32 - #define _mm_mask_srlv_epi32(src, k, a, b) simde_mm_mask_srlv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_srlv_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_srlv_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_srlv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_srlv_epi32 - #define _mm_maskz_srlv_epi32(k, a, b) simde_mm_maskz_srlv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_srlv_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_srlv_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_srlv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_srlv_epi64 - #define _mm_mask_srlv_epi64(src, k, a, b) simde_mm_mask_srlv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_srlv_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_srlv_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_srlv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_srlv_epi64 - #define _mm_maskz_srlv_epi64(k, a, b) simde_mm_maskz_srlv_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_srlv_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_srlv_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi16 - #define _mm256_srlv_epi16(a, b) simde_mm256_srlv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srlv_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_srlv_epi16(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srlv_epi16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srlv_epi16 - #define _mm512_srlv_epi16(a, b) simde_mm512_srlv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srlv_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_srlv_epi32(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srlv_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srlv_epi32 - #define _mm512_srlv_epi32(a, b) simde_mm512_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srlv_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_srlv_epi64(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srlv_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srlv_epi64 - #define _mm512_srlv_epi64(a, b) simde_mm512_srlv_epi64(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRLV_H) */ -/* :: End simde/x86/avx512/srlv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sllv.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SLLV_H) -#define SIMDE_X86_AVX512_SLLV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sllv_epi16 (simde__m512i a, simde__m512i b) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 << b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << b_.u16[i])) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_sllv_epi16(a, b) _mm512_sllv_epi16(a, b) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sllv_epi16 - #define _mm512_sllv_epi16(a, b) simde_mm512_sllv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sllv_epi32 (simde__m512i a, simde__m512i b) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << b_.u32[i])) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_sllv_epi32(a, b) _mm512_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sllv_epi32 - #define _mm512_sllv_epi32(a, b) simde_mm512_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sllv_epi64 (simde__m512i a, simde__m512i b) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? HEDLEY_STATIC_CAST(uint64_t, (a_.u64[i] << b_.u64[i])) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_sllv_epi64(a, b) _mm512_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sllv_epi64 - #define _mm512_sllv_epi64(a, b) simde_mm512_sllv_epi64(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SLLV_H) */ -/* :: End simde/x86/avx512/sllv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SUB_H) -#define SIMDE_X86_AVX512_SUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi8 - #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_sub_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi8 - #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_sub_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi8 - #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi16 - #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi32 - #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi32 - #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi32 - #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi64 - #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi64 - #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi64 - #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_ps - #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_ps - #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_ps - #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_pd - #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_pd - #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_pd - #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SUB_H) */ -/* :: End simde/x86/avx512/sub.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rolv_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_rolv_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32 = vec_rl(a_.altivec_u32, b_.altivec_u32); - - return simde__m128i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m128i - count1 = simde_mm_and_si128(b, simde_mm_set1_epi32(31)), - count2 = simde_mm_sub_epi32(simde_mm_set1_epi32(32), count1); - - return simde_mm_or_si128(simde_mm_sllv_epi32(a, count1), simde_mm_srlv_epi32(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rolv_epi32 - #define _mm_rolv_epi32(a, b) simde_mm_rolv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_rolv_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_rolv_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rolv_epi32 - #define _mm_mask_rolv_epi32(src, k, a, b) simde_mm_mask_rolv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_rolv_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_rolv_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rolv_epi32 - #define _mm_maskz_rolv_epi32(k, a, b) simde_mm_maskz_rolv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rolv_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_rolv_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_u32 = vec_rl(a_.m128i_private[i].altivec_u32, b_.m128i_private[i].altivec_u32); - } - - return simde__m256i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_rolv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_rolv_epi32(a_.m128i[1], b_.m128i[1]); - - return simde__m256i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m256i - count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi32(31)), - count2 = simde_mm256_sub_epi32(simde_mm256_set1_epi32(32), count1); - - return simde_mm256_or_si256(simde_mm256_sllv_epi32(a, count1), simde_mm256_srlv_epi32(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rolv_epi32 - #define _mm256_rolv_epi32(a, b) simde_mm256_rolv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_rolv_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_rolv_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rolv_epi32 - #define _mm256_mask_rolv_epi32(src, k, a, b) simde_mm256_mask_rolv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_rolv_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_rolv_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rolv_epi32 - #define _mm256_maskz_rolv_epi32(k, a, b) simde_mm256_maskz_rolv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rolv_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rolv_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_u32 = vec_rl(a_.m128i_private[i].altivec_u32, b_.m128i_private[i].altivec_u32); - } - - return simde__m512i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_rolv_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_rolv_epi32(a_.m256i[1], b_.m256i[1]); - - return simde__m512i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m512i - count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi32(31)), - count2 = simde_mm512_sub_epi32(simde_mm512_set1_epi32(32), count1); - - return simde_mm512_or_si512(simde_mm512_sllv_epi32(a, count1), simde_mm512_srlv_epi32(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rolv_epi32 - #define _mm512_rolv_epi32(a, b) simde_mm512_rolv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rolv_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rolv_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rolv_epi32 - #define _mm512_mask_rolv_epi32(src, k, a, b) simde_mm512_mask_rolv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_rolv_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_rolv_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_rolv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rolv_epi32 - #define _mm512_maskz_rolv_epi32(k, a, b) simde_mm512_maskz_rolv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rolv_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_rolv_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = vec_rl(a_.altivec_u64, b_.altivec_u64); - - return simde__m128i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m128i - count1 = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)), - count2 = simde_mm_sub_epi64(simde_mm_set1_epi64x(64), count1); - - return simde_mm_or_si128(simde_mm_sllv_epi64(a, count1), simde_mm_srlv_epi64(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rolv_epi64 - #define _mm_rolv_epi64(a, b) simde_mm_rolv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_rolv_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_rolv_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rolv_epi64 - #define _mm_mask_rolv_epi64(src, k, a, b) simde_mm_mask_rolv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_rolv_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_rolv_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rolv_epi64 - #define _mm_maskz_rolv_epi64(k, a, b) simde_mm_maskz_rolv_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rolv_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_rolv_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_u64 = vec_rl(a_.m128i_private[i].altivec_u64, b_.m128i_private[i].altivec_u64); - } - - return simde__m256i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_rolv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_rolv_epi64(a_.m128i[1], b_.m128i[1]); - - return simde__m256i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m256i - count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi64x(63)), - count2 = simde_mm256_sub_epi64(simde_mm256_set1_epi64x(64), count1); - - return simde_mm256_or_si256(simde_mm256_sllv_epi64(a, count1), simde_mm256_srlv_epi64(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rolv_epi64 - #define _mm256_rolv_epi64(a, b) simde_mm256_rolv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_rolv_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_rolv_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rolv_epi64 - #define _mm256_mask_rolv_epi64(src, k, a, b) simde_mm256_mask_rolv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_rolv_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_rolv_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rolv_epi64 - #define _mm256_maskz_rolv_epi64(k, a, b) simde_mm256_maskz_rolv_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rolv_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rolv_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_u64 = vec_rl(a_.m128i_private[i].altivec_u64, b_.m128i_private[i].altivec_u64); - } - - return simde__m512i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_rolv_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_rolv_epi64(a_.m256i[1], b_.m256i[1]); - - return simde__m512i_from_private(r_); - #else - HEDLEY_STATIC_CAST(void, r_); - HEDLEY_STATIC_CAST(void, a_); - HEDLEY_STATIC_CAST(void, b_); - - simde__m512i - count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi64(63)), - count2 = simde_mm512_sub_epi64(simde_mm512_set1_epi64(64), count1); - - return simde_mm512_or_si512(simde_mm512_sllv_epi64(a, count1), simde_mm512_srlv_epi64(a, count2)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rolv_epi64 - #define _mm512_rolv_epi64(a, b) simde_mm512_rolv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rolv_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rolv_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rolv_epi64 - #define _mm512_mask_rolv_epi64(src, k, a, b) simde_mm512_mask_rolv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_rolv_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_rolv_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_rolv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rolv_epi64 - #define _mm512_maskz_rolv_epi64(k, a, b) simde_mm512_maskz_rolv_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROLV_H) */ -/* :: End simde/x86/avx512/rolv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/ror.h :: */ -#if !defined(SIMDE_X86_AVX512_ROR_H) -#define SIMDE_X86_AVX512_ROR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_ror_epi32(a, imm8) _mm_ror_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_ror_epi32 (simde__m128i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m128i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_ror_epi32 - #define _mm_ror_epi32(a, imm8) simde_mm_ror_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_ror_epi32(src, k, a, imm8) _mm_mask_ror_epi32(src, k, a, imm8) -#else - #define simde_mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_ror_epi32 - #define _mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_ror_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_ror_epi32(k, a, imm8) _mm_maskz_ror_epi32(k, a, imm8) -#else - #define simde_mm_maskz_ror_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_ror_epi32 - #define _mm_maskz_ror_epi32(k, a, imm8) simde_mm_maskz_ror_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_ror_epi32(a, imm8) _mm256_ror_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_ror_epi32 (simde__m256i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m256i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_ror_epi32 - #define _mm256_ror_epi32(a, imm8) simde_mm256_ror_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_ror_epi32(src, k, a, imm8) _mm256_mask_ror_epi32(src, k, a, imm8) -#else - #define simde_mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_ror_epi32 - #define _mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_ror_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_ror_epi32(k, a, imm8) _mm256_maskz_ror_epi32(k, a, imm8) -#else - #define simde_mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_ror_epi32 - #define _mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_ror_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_ror_epi32(a, imm8) _mm512_ror_epi32(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_ror_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); - break; - } - #else - switch (imm8 & 31) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); - } - break; - } - #endif - - return simde__m512i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_ror_epi32 - #define _mm512_ror_epi32(a, imm8) simde_mm512_ror_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_ror_epi32(src, k, a, imm8) _mm512_mask_ror_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ror_epi32 - #define _mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_ror_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_ror_epi32(k, a, imm8) _mm512_maskz_ror_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_ror_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_ror_epi32 - #define _mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_ror_epi32(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_ror_epi64(a, imm8) _mm_ror_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_ror_epi64 (simde__m128i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m128i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_ror_epi64 - #define _mm_ror_epi64(a, imm8) simde_mm_ror_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_ror_epi64(src, k, a, imm8) _mm_mask_ror_epi64(src, k, a, imm8) -#else - #define simde_mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_ror_epi64 - #define _mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_ror_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_ror_epi64(k, a, imm8) _mm_maskz_ror_epi64(k, a, imm8) -#else - #define simde_mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_ror_epi64 - #define _mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_ror_epi64(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_ror_epi64(a, imm8) _mm256_ror_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_ror_epi64 (simde__m256i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m256i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_ror_epi64 - #define _mm256_ror_epi64(a, imm8) simde_mm256_ror_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_ror_epi64(src, k, a, imm8) _mm256_mask_ror_epi64(src, k, a, imm8) -#else - #define simde_mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_ror_epi64 - #define _mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_ror_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_ror_epi64(k, a, imm8) _mm256_maskz_ror_epi64(k, a, imm8) -#else - #define simde_mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_ror_epi64 - #define _mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_ror_epi64(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_ror_epi64(a, imm8) _mm512_ror_epi64(a, imm8) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_ror_epi64 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); - break; - } - #else - switch (imm8 & 63) { - case 0: - r_ = a_; - break; - default: - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); - } - break; - } - #endif - - return simde__m512i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_ror_epi64 - #define _mm512_ror_epi64(a, imm8) simde_mm512_ror_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_ror_epi64(src, k, a, imm8) _mm512_mask_ror_epi64(src, k, a, imm8) -#else - #define simde_mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ror_epi64 - #define _mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_ror_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_ror_epi64(k, a, imm8) _mm512_maskz_ror_epi64(k, a, imm8) -#else - #define simde_mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_ror_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_ror_epi64 - #define _mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_ror_epi64(k, a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROR_H) */ -/* :: End simde/x86/avx512/ror.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/rorv.h :: */ -#if !defined(SIMDE_X86_AVX512_RORV_H) -#define SIMDE_X86_AVX512_RORV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rorv_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_rorv_epi32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.altivec_u32)); - return simde__m128i_from_private(r_); - #else - simde__m128i - count1 = simde_mm_and_si128(b, simde_mm_set1_epi32(31)), - count2 = simde_mm_sub_epi32(simde_mm_set1_epi32(32), count1); - return simde_mm_or_si128(simde_mm_srlv_epi32(a, count1), simde_mm_sllv_epi32(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rorv_epi32 - #define _mm_rorv_epi32(a, b) simde_mm_rorv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_rorv_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_rorv_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rorv_epi32 - #define _mm_mask_rorv_epi32(src, k, a, b) simde_mm_mask_rorv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_rorv_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_rorv_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rorv_epi32 - #define _mm_maskz_rorv_epi32(k, a, b) simde_mm_maskz_rorv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rorv_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_rorv_epi32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.m128i_private[i].altivec_u32)); - } - - return simde__m256i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i[0] = simde_mm_rorv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_rorv_epi32(a_.m128i[1], b_.m128i[1]); - - return simde__m256i_from_private(r_); - #else - simde__m256i - count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi32(31)), - count2 = simde_mm256_sub_epi32(simde_mm256_set1_epi32(32), count1); - return simde_mm256_or_si256(simde_mm256_srlv_epi32(a, count1), simde_mm256_sllv_epi32(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rorv_epi32 - #define _mm256_rorv_epi32(a, b) simde_mm256_rorv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_rorv_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_rorv_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rorv_epi32 - #define _mm256_mask_rorv_epi32(src, k, a, b) simde_mm256_mask_rorv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_rorv_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_rorv_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rorv_epi32 - #define _mm256_maskz_rorv_epi32(k, a, b) simde_mm256_maskz_rorv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rorv_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rorv_epi32(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.m128i_private[i].altivec_u32)); - } - - return simde__m512i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - r_.m256i[0] = simde_mm256_rorv_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_rorv_epi32(a_.m256i[1], b_.m256i[1]); - - return simde__m512i_from_private(r_); - #else - simde__m512i - count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi32(31)), - count2 = simde_mm512_sub_epi32(simde_mm512_set1_epi32(32), count1); - return simde_mm512_or_si512(simde_mm512_srlv_epi32(a, count1), simde_mm512_sllv_epi32(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rorv_epi32 - #define _mm512_rorv_epi32(a, b) simde_mm512_rorv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rorv_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rorv_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rorv_epi32 - #define _mm512_mask_rorv_epi32(src, k, a, b) simde_mm512_mask_rorv_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_rorv_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_rorv_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_rorv_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rorv_epi32 - #define _mm512_maskz_rorv_epi32(k, a, b) simde_mm512_maskz_rorv_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rorv_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_rorv_epi64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.altivec_u64)); - return simde__m128i_from_private(r_); - #else - simde__m128i - count1 = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)), - count2 = simde_mm_sub_epi64(simde_mm_set1_epi64x(64), count1); - return simde_mm_or_si128(simde_mm_srlv_epi64(a, count1), simde_mm_sllv_epi64(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_rorv_epi64 - #define _mm_rorv_epi64(a, b) simde_mm_rorv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_rorv_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_rorv_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_rorv_epi64 - #define _mm_mask_rorv_epi64(src, k, a, b) simde_mm_mask_rorv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_rorv_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_rorv_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_rorv_epi64 - #define _mm_maskz_rorv_epi64(k, a, b) simde_mm_maskz_rorv_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rorv_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_rorv_epi64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.m128i_private[i].altivec_u64)); - } - - return simde__m256i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i[0] = simde_mm_rorv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_rorv_epi64(a_.m128i[1], b_.m128i[1]); - - return simde__m256i_from_private(r_); - #else - simde__m256i - count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi64x(63)), - count2 = simde_mm256_sub_epi64(simde_mm256_set1_epi64x(64), count1); - return simde_mm256_or_si256(simde_mm256_srlv_epi64(a, count1), simde_mm256_sllv_epi64(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_rorv_epi64 - #define _mm256_rorv_epi64(a, b) simde_mm256_rorv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_rorv_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_rorv_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_rorv_epi64 - #define _mm256_mask_rorv_epi64(src, k, a, b) simde_mm256_mask_rorv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_rorv_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_rorv_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_rorv_epi64 - #define _mm256_maskz_rorv_epi64(k, a, b) simde_mm256_maskz_rorv_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rorv_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rorv_epi64(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { - r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.m128i_private[i].altivec_u64)); - } - - return simde__m512i_from_private(r_); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - r_.m256i[0] = simde_mm256_rorv_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_rorv_epi64(a_.m256i[1], b_.m256i[1]); - - return simde__m512i_from_private(r_); - #else - simde__m512i - count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi64(63)), - count2 = simde_mm512_sub_epi64(simde_mm512_set1_epi64(64), count1); - return simde_mm512_or_si512(simde_mm512_srlv_epi64(a, count1), simde_mm512_sllv_epi64(a, count2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_rorv_epi64 - #define _mm512_rorv_epi64(a, b) simde_mm512_rorv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rorv_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rorv_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rorv_epi64 - #define _mm512_mask_rorv_epi64(src, k, a, b) simde_mm512_mask_rorv_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_rorv_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_rorv_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_rorv_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_rorv_epi64 - #define _mm512_maskz_rorv_epi64(k, a, b) simde_mm512_maskz_rorv_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_RORV_H) */ -/* :: End simde/x86/avx512/rorv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/round.h :: */ -#if !defined(SIMDE_X86_AVX512_ROUND_H) -#define SIMDE_X86_AVX512_ROUND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_x_mm512_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_x_mm512_round_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_x_mm512_round_ps_a_ = simde__m512_to_private(a); \ - \ - for (size_t simde_x_mm512_round_ps_i = 0 ; simde_x_mm512_round_ps_i < (sizeof(simde_x_mm512_round_ps_r_.m256) / sizeof(simde_x_mm512_round_ps_r_.m256[0])) ; simde_x_mm512_round_ps_i++) { \ - simde_x_mm512_round_ps_r_.m256[simde_x_mm512_round_ps_i] = simde_mm256_round_ps(simde_x_mm512_round_ps_a_.m256[simde_x_mm512_round_ps_i], rounding); \ - } \ - \ - simde__m512_from_private(simde_x_mm512_round_ps_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_x_mm512_round_ps (simde__m512 a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.m128_private[i].altivec_f32)); - } - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vrndiq_f32(a_.m128_private[i].neon_f32); - } - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.m128_private[i].altivec_f32)); - } - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vrndnq_f32(a_.m128_private[i].neon_f32); - } - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.m128_private[i].altivec_f32)); - } - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vrndmq_f32(a_.m128_private[i].neon_f32); - } - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.m128_private[i].altivec_f32)); - } - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vrndpq_f32(a_.m128_private[i].neon_f32); - } - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.m128_private[i].altivec_f32)); - } - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vrndq_f32(a_.m128_private[i].neon_f32); - } - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); - } - - return simde__m512_from_private(r_); - } -#endif - -#if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_x_mm512_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_x_mm512_round_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_x_mm512_round_pd_a_ = simde__m512d_to_private(a); \ - \ - for (size_t simde_x_mm512_round_pd_i = 0 ; simde_x_mm512_round_pd_i < (sizeof(simde_x_mm512_round_pd_r_.m256d) / sizeof(simde_x_mm512_round_pd_r_.m256d[0])) ; simde_x_mm512_round_pd_i++) { \ - simde_x_mm512_round_pd_r_.m256d[simde_x_mm512_round_pd_i] = simde_mm256_round_pd(simde_x_mm512_round_pd_a_.m256d[simde_x_mm512_round_pd_i], rounding); \ - } \ - \ - simde__m512d_from_private(simde_x_mm512_round_pd_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_x_mm512_round_pd (simde__m512d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.m128d_private[i].altivec_f64)); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vrndiq_f64(a_.m128d_private[i].neon_f64); - } - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.m128d_private[i].altivec_f64)); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vrndaq_f64(a_.m128d_private[i].neon_f64); - } - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.m128d_private[i].altivec_f64)); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vrndmq_f64(a_.m128d_private[i].neon_f64); - } - #elif defined(simde_math_floor) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.m128d_private[i].altivec_f64)); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vrndpq_f64(a_.m128d_private[i].neon_f64); - } - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.m128d_private[i].altivec_f64)); - } - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vrndq_f64(a_.m128d_private[i].neon_f64); - } - #elif defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); - } - - return simde__m512d_from_private(r_); - } -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROUND_H) */ -/* :: End simde/x86/avx512/round.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/roundscale.h :: */ -#if !defined(SIMDE_X86_AVX512_ROUNDSCALE_H) -#define SIMDE_X86_AVX512_ROUNDSCALE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_ps(a, imm8) _mm_roundscale_ps((a), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_roundscale_ps_internal_ (simde__m128 result, simde__m128 a, int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m128 r, clear_sign; - - clear_sign = simde_mm_andnot_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); - r = simde_x_mm_select_ps(result, a, simde_mm_cmpeq_ps(clear_sign, simde_mm_set1_ps(SIMDE_MATH_INFINITYF))); - - return r; - } - #define simde_mm_roundscale_ps(a, imm8) \ - simde_mm_roundscale_ps_internal_( \ - simde_mm_mul_ps( \ - simde_mm_round_ps( \ - simde_mm_mul_ps( \ - a, \ - simde_mm_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_ps - #define _mm_roundscale_ps(a, imm8) simde_mm_roundscale_ps(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_roundscale_ps(src, k, a, imm8) _mm_mask_roundscale_ps(src, k, a, imm8) -#else - #define simde_mm_mask_roundscale_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_ps - #define _mm_mask_roundscale_ps(src, k, a, imm8) simde_mm_mask_roundscale_ps(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_roundscale_ps(k, a, imm8) _mm_maskz_roundscale_ps(k, a, imm8) -#else - #define simde_mm_maskz_roundscale_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_ps - #define _mm_maskz_roundscale_ps(k, a, imm8) simde_mm_maskz_roundscale_ps(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm256_roundscale_ps(a, imm8) _mm256_roundscale_ps((a), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_roundscale_ps(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_roundscale_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_roundscale_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_roundscale_ps_i = 0 ; simde_mm256_roundscale_ps_i < (sizeof(simde_mm256_roundscale_ps_r_.m128) / sizeof(simde_mm256_roundscale_ps_r_.m128[0])) ; simde_mm256_roundscale_ps_i++) { \ - simde_mm256_roundscale_ps_r_.m128[simde_mm256_roundscale_ps_i] = simde_mm_roundscale_ps(simde_mm256_roundscale_ps_a_.m128[simde_mm256_roundscale_ps_i], imm8); \ - } \ - \ - simde__m256_from_private(simde_mm256_roundscale_ps_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256 - simde_mm256_roundscale_ps_internal_ (simde__m256 result, simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m256 r, clear_sign; - - clear_sign = simde_mm256_andnot_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); - r = simde_x_mm256_select_ps(result, a, simde_mm256_castsi256_ps(simde_mm256_cmpeq_epi32(simde_mm256_castps_si256(clear_sign), simde_mm256_castps_si256(simde_mm256_set1_ps(SIMDE_MATH_INFINITYF))))); - - return r; - } - #define simde_mm256_roundscale_ps(a, imm8) \ - simde_mm256_roundscale_ps_internal_( \ - simde_mm256_mul_ps( \ - simde_mm256_round_ps( \ - simde_mm256_mul_ps( \ - a, \ - simde_mm256_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm256_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_roundscale_ps - #define _mm256_roundscale_ps(a, imm8) simde_mm256_roundscale_ps(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_roundscale_ps(src, k, a, imm8) _mm256_mask_roundscale_ps(src, k, a, imm8) -#else - #define simde_mm256_mask_roundscale_ps(src, k, a, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_roundscale_ps - #define _mm256_mask_roundscale_ps(src, k, a, imm8) simde_mm256_mask_roundscale_ps(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_roundscale_ps(k, a, imm8) _mm256_maskz_roundscale_ps(k, a, imm8) -#else - #define simde_mm256_maskz_roundscale_ps(k, a, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_roundscale_ps - #define _mm256_maskz_roundscale_ps(k, a, imm8) simde_mm256_maskz_roundscale_ps(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_roundscale_ps(a, imm8) _mm512_roundscale_ps((a), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_roundscale_ps(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_roundscale_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_roundscale_ps_a_ = simde__m512_to_private(a); \ - \ - for (size_t simde_mm512_roundscale_ps_i = 0 ; simde_mm512_roundscale_ps_i < (sizeof(simde_mm512_roundscale_ps_r_.m256) / sizeof(simde_mm512_roundscale_ps_r_.m256[0])) ; simde_mm512_roundscale_ps_i++) { \ - simde_mm512_roundscale_ps_r_.m256[simde_mm512_roundscale_ps_i] = simde_mm256_roundscale_ps(simde_mm512_roundscale_ps_a_.m256[simde_mm512_roundscale_ps_i], imm8); \ - } \ - \ - simde__m512_from_private(simde_mm512_roundscale_ps_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_roundscale_ps_internal_ (simde__m512 result, simde__m512 a, int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m512 r, clear_sign; - - clear_sign = simde_mm512_andnot_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); - r = simde_mm512_mask_mov_ps(result, simde_mm512_cmpeq_epi32_mask(simde_mm512_castps_si512(clear_sign), simde_mm512_castps_si512(simde_mm512_set1_ps(SIMDE_MATH_INFINITYF))), a); - - return r; - } - #define simde_mm512_roundscale_ps(a, imm8) \ - simde_mm512_roundscale_ps_internal_( \ - simde_mm512_mul_ps( \ - simde_x_mm512_round_ps( \ - simde_mm512_mul_ps( \ - a, \ - simde_mm512_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm512_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_roundscale_ps - #define _mm512_roundscale_ps(a, imm8) simde_mm512_roundscale_ps(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_roundscale_ps(src, k, a, imm8) _mm512_mask_roundscale_ps(src, k, a, imm8) -#else - #define simde_mm512_mask_roundscale_ps(src, k, a, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_roundscale_ps - #define _mm512_mask_roundscale_ps(src, k, a, imm8) simde_mm512_mask_roundscale_ps(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_roundscale_ps(k, a, imm8) _mm512_maskz_roundscale_ps(k, a, imm8) -#else - #define simde_mm512_maskz_roundscale_ps(k, a, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_roundscale_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_roundscale_ps - #define _mm512_maskz_roundscale_ps(k, a, imm8) simde_mm512_maskz_roundscale_ps(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_pd(a, imm8) _mm_roundscale_pd((a), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_roundscale_pd_internal_ (simde__m128d result, simde__m128d a, int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m128d r, clear_sign; - - clear_sign = simde_mm_andnot_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); - r = simde_x_mm_select_pd(result, a, simde_mm_cmpeq_pd(clear_sign, simde_mm_set1_pd(SIMDE_MATH_INFINITY))); - - return r; - } - #define simde_mm_roundscale_pd(a, imm8) \ - simde_mm_roundscale_pd_internal_( \ - simde_mm_mul_pd( \ - simde_mm_round_pd( \ - simde_mm_mul_pd( \ - a, \ - simde_mm_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_pd - #define _mm_roundscale_pd(a, imm8) simde_mm_roundscale_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_roundscale_pd(src, k, a, imm8) _mm_mask_roundscale_pd(src, k, a, imm8) -#else - #define simde_mm_mask_roundscale_pd(src, k, a, imm8) simde_mm_mask_mov_pd(src, k, simde_mm_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_pd - #define _mm_mask_roundscale_pd(src, k, a, imm8) simde_mm_mask_roundscale_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_roundscale_pd(k, a, imm8) _mm_maskz_roundscale_pd(k, a, imm8) -#else - #define simde_mm_maskz_roundscale_pd(k, a, imm8) simde_mm_maskz_mov_pd(k, simde_mm_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_pd - #define _mm_maskz_roundscale_pd(k, a, imm8) simde_mm_maskz_roundscale_pd(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm256_roundscale_pd(a, imm8) _mm256_roundscale_pd((a), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_roundscale_pd(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_roundscale_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_roundscale_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_roundscale_pd_i = 0 ; simde_mm256_roundscale_pd_i < (sizeof(simde_mm256_roundscale_pd_r_.m128d) / sizeof(simde_mm256_roundscale_pd_r_.m128d[0])) ; simde_mm256_roundscale_pd_i++) { \ - simde_mm256_roundscale_pd_r_.m128d[simde_mm256_roundscale_pd_i] = simde_mm_roundscale_pd(simde_mm256_roundscale_pd_a_.m128d[simde_mm256_roundscale_pd_i], imm8); \ - } \ - \ - simde__m256d_from_private(simde_mm256_roundscale_pd_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m256d - simde_mm256_roundscale_pd_internal_ (simde__m256d result, simde__m256d a, int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m256d r, clear_sign; - - clear_sign = simde_mm256_andnot_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); - r = simde_x_mm256_select_pd(result, a, simde_mm256_castsi256_pd(simde_mm256_cmpeq_epi64(simde_mm256_castpd_si256(clear_sign), simde_mm256_castpd_si256(simde_mm256_set1_pd(SIMDE_MATH_INFINITY))))); - - return r; - } - #define simde_mm256_roundscale_pd(a, imm8) \ - simde_mm256_roundscale_pd_internal_( \ - simde_mm256_mul_pd( \ - simde_mm256_round_pd( \ - simde_mm256_mul_pd( \ - a, \ - simde_mm256_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm256_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_roundscale_pd - #define _mm256_roundscale_pd(a, imm8) simde_mm256_roundscale_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_roundscale_pd(src, k, a, imm8) _mm256_mask_roundscale_pd(src, k, a, imm8) -#else - #define simde_mm256_mask_roundscale_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_roundscale_pd - #define _mm256_mask_roundscale_pd(src, k, a, imm8) simde_mm256_mask_roundscale_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_roundscale_pd(k, a, imm8) _mm256_maskz_roundscale_pd(k, a, imm8) -#else - #define simde_mm256_maskz_roundscale_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_roundscale_pd - #define _mm256_maskz_roundscale_pd(k, a, imm8) simde_mm256_maskz_roundscale_pd(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_roundscale_pd(a, imm8) _mm512_roundscale_pd((a), (imm8)) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_roundscale_pd(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_roundscale_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_roundscale_pd_a_ = simde__m512d_to_private(a); \ - \ - for (size_t simde_mm512_roundscale_pd_i = 0 ; simde_mm512_roundscale_pd_i < (sizeof(simde_mm512_roundscale_pd_r_.m256d) / sizeof(simde_mm512_roundscale_pd_r_.m256d[0])) ; simde_mm512_roundscale_pd_i++) { \ - simde_mm512_roundscale_pd_r_.m256d[simde_mm512_roundscale_pd_i] = simde_mm256_roundscale_pd(simde_mm512_roundscale_pd_a_.m256d[simde_mm512_roundscale_pd_i], imm8); \ - } \ - \ - simde__m512d_from_private(simde_mm512_roundscale_pd_r_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_roundscale_pd_internal_ (simde__m512d result, simde__m512d a, int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m512d r, clear_sign; - - clear_sign = simde_mm512_andnot_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); - r = simde_mm512_mask_mov_pd(result, simde_mm512_cmpeq_epi64_mask(simde_mm512_castpd_si512(clear_sign), simde_mm512_castpd_si512(simde_mm512_set1_pd(SIMDE_MATH_INFINITY))), a); - - return r; - } - #define simde_mm512_roundscale_pd(a, imm8) \ - simde_mm512_roundscale_pd_internal_( \ - simde_mm512_mul_pd( \ - simde_x_mm512_round_pd( \ - simde_mm512_mul_pd( \ - a, \ - simde_mm512_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm512_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ - ), \ - (a), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_roundscale_pd - #define _mm512_roundscale_pd(a, imm8) simde_mm512_roundscale_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_roundscale_pd(src, k, a, imm8) _mm512_mask_roundscale_pd(src, k, a, imm8) -#else - #define simde_mm512_mask_roundscale_pd(src, k, a, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_roundscale_pd - #define _mm512_mask_roundscale_pd(src, k, a, imm8) simde_mm512_mask_roundscale_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_roundscale_pd(k, a, imm8) _mm512_maskz_roundscale_pd(k, a, imm8) -#else - #define simde_mm512_maskz_roundscale_pd(k, a, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_roundscale_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_roundscale_pd - #define _mm512_maskz_roundscale_pd(k, a, imm8) simde_mm512_maskz_roundscale_pd(k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_ss(a, b, imm8) _mm_roundscale_ss((a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_roundscale_ss_internal_ (simde__m128 result, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m128_private - r_ = simde__m128_to_private(result), - b_ = simde__m128_to_private(b); - - if(simde_math_isinff(r_.f32[0])) - r_.f32[0] = b_.f32[0]; - - return simde__m128_from_private(r_); - } - #define simde_mm_roundscale_ss(a, b, imm8) \ - simde_mm_roundscale_ss_internal_( \ - simde_mm_mul_ss( \ - simde_mm_round_ss( \ - a, \ - simde_mm_mul_ss( \ - b, \ - simde_mm_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ - ), \ - (b), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_ss - #define _mm_roundscale_ss(a, b, imm8) simde_mm_roundscale_ss(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_mask_roundscale_ss(src, k, a, b, imm8) _mm_mask_roundscale_ss((src), (k), (a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_mask_roundscale_ss_internal_ (simde__m128 a, simde__m128 b, simde__mmask8 k) { - simde__m128 r; - - if(k & 1) - r = a; - else - r = b; - - return r; - } - #define simde_mm_mask_roundscale_ss(src, k, a, b, imm8) \ - simde_mm_mask_roundscale_ss_internal_( \ - simde_mm_roundscale_ss( \ - a, \ - b, \ - imm8 \ - ), \ - simde_mm_move_ss( \ - (a), \ - (src) \ - ), \ - (k) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_ss - #define _mm_mask_roundscale_ss(src, k, a, b, imm8) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_maskz_roundscale_ss(k, a, b, imm8) _mm_maskz_roundscale_ss((k), (a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_maskz_roundscale_ss_internal_ (simde__m128 a, simde__m128 b, simde__mmask8 k) { - simde__m128 r; - - if(k & 1) - r = a; - else - r = b; - - return r; - } - #define simde_mm_maskz_roundscale_ss(k, a, b, imm8) \ - simde_mm_maskz_roundscale_ss_internal_( \ - simde_mm_roundscale_ss( \ - a, \ - b, \ - imm8 \ - ), \ - simde_mm_move_ss( \ - (a), \ - simde_mm_setzero_ps() \ - ), \ - (k) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_ss - #define _mm_maskz_roundscale_ss(k, a, b, imm8) simde_mm_maskz_roundscale_ss(k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_sd(a, b, imm8) _mm_roundscale_sd((a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_roundscale_sd_internal_ (simde__m128d result, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - HEDLEY_STATIC_CAST(void, imm8); - - simde__m128d_private - r_ = simde__m128d_to_private(result), - b_ = simde__m128d_to_private(b); - - if(simde_math_isinf(r_.f64[0])) - r_.f64[0] = b_.f64[0]; - - return simde__m128d_from_private(r_); - } - #define simde_mm_roundscale_sd(a, b, imm8) \ - simde_mm_roundscale_sd_internal_( \ - simde_mm_mul_sd( \ - simde_mm_round_sd( \ - a, \ - simde_mm_mul_sd( \ - b, \ - simde_mm_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ - ((imm8) & 15) \ - ), \ - simde_mm_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ - ), \ - (b), \ - (imm8) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_sd - #define _mm_roundscale_sd(a, b, imm8) simde_mm_roundscale_sd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_mask_roundscale_sd(src, k, a, b, imm8) _mm_mask_roundscale_sd((src), (k), (a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_mask_roundscale_sd_internal_ (simde__m128d a, simde__m128d b, simde__mmask8 k) { - simde__m128d r; - - if(k & 1) - r = a; - else - r = b; - - return r; - } - #define simde_mm_mask_roundscale_sd(src, k, a, b, imm8) \ - simde_mm_mask_roundscale_sd_internal_( \ - simde_mm_roundscale_sd( \ - a, \ - b, \ - imm8 \ - ), \ - simde_mm_move_sd( \ - (a), \ - (src) \ - ), \ - (k) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_sd - #define _mm_mask_roundscale_sd(src, k, a, b, imm8) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_maskz_roundscale_sd(k, a, b, imm8) _mm_maskz_roundscale_sd((k), (a), (b), (imm8)) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_maskz_roundscale_sd_internal_ (simde__m128d a, simde__m128d b, simde__mmask8 k) { - simde__m128d r; - - if(k & 1) - r = a; - else - r = b; - - return r; - } - #define simde_mm_maskz_roundscale_sd(k, a, b, imm8) \ - simde_mm_maskz_roundscale_sd_internal_( \ - simde_mm_roundscale_sd( \ - a, \ - b, \ - imm8 \ - ), \ - simde_mm_move_sd( \ - (a), \ - simde_mm_setzero_pd() \ - ), \ - (k) \ - ) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_sd - #define _mm_maskz_roundscale_sd(k, a, b, imm8) simde_mm_maskz_roundscale_sd(k, a, b, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROUNDSCALE_H) */ -/* :: End simde/x86/avx512/roundscale.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/roundscale_round.h :: */ -#if !defined(SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H) -#define SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(HEDLEY_MSVC_VERSION) -#pragma warning( push ) -#pragma warning( disable : 4244 ) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_roundscale_round_ps(a, imm8, sae) _mm512_roundscale_round_ps(a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_ps(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_roundscale_round_ps(a,imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_roundscale_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_roundscale_round_ps_envp; \ - int simde_mm512_roundscale_round_ps_x = feholdexcept(&simde_mm512_roundscale_round_ps_envp); \ - simde_mm512_roundscale_round_ps_r = simde_mm512_roundscale_ps(a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_roundscale_round_ps_x == 0)) \ - fesetenv(&simde_mm512_roundscale_round_ps_envp); \ - } \ - else { \ - simde_mm512_roundscale_round_ps_r = simde_mm512_roundscale_ps(a, imm8); \ - } \ - \ - simde_mm512_roundscale_round_ps_r; \ - })) - #else - #define simde_mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_ps(a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_roundscale_round_ps (simde__m512 a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_roundscale_ps(a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_roundscale_ps(a, imm8); - #endif - } - else { - r = simde_mm512_roundscale_ps(a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_roundscale_round_ps - #define _mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_round_ps(a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) _mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_ps(src, k, a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_mask_roundscale_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_roundscale_round_ps_envp; \ - int simde_mm512_mask_roundscale_round_ps_x = feholdexcept(&simde_mm512_mask_roundscale_round_ps_envp); \ - simde_mm512_mask_roundscale_round_ps_r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_roundscale_round_ps_x == 0)) \ - fesetenv(&simde_mm512_mask_roundscale_round_ps_envp); \ - } \ - else { \ - simde_mm512_mask_roundscale_round_ps_r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); \ - } \ - \ - simde_mm512_mask_roundscale_round_ps_r; \ - })) - #else - #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_ps(src, k, a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_mask_roundscale_round_ps (simde__m512 src, simde__mmask8 k, simde__m512 a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); - #endif - } - else { - r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_roundscale_round_ps - #define _mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) _mm512_maskz_roundscale_round_ps(k, a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) simde_mm512_maskz_roundscale_ps(k, a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512 simde_mm512_maskz_roundscale_round_ps_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_roundscale_round_ps_envp; \ - int simde_mm512_maskz_roundscale_round_ps_x = feholdexcept(&simde_mm512_maskz_roundscale_round_ps_envp); \ - simde_mm512_maskz_roundscale_round_ps_r = simde_mm512_maskz_roundscale_ps(k, a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_roundscale_round_ps_x == 0)) \ - fesetenv(&simde_mm512_maskz_roundscale_round_ps_envp); \ - } \ - else { \ - simde_mm512_maskz_roundscale_round_ps_r = simde_mm512_maskz_roundscale_ps(k, a, imm8); \ - } \ - \ - simde_mm512_maskz_roundscale_round_ps_r; \ - })) - #else - #define simde_mm512_maskz_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_maskz_roundscale_ps(k, a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_maskz_roundscale_round_ps (simde__mmask8 k, simde__m512 a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_roundscale_ps(k, a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_roundscale_ps(k, a, imm8); - #endif - } - else { - r = simde_mm512_maskz_roundscale_ps(k, a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_roundscale_round_ps - #define _mm512_maskz_roundscale_round_ps(k, a, imm8, sae) simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_roundscale_round_pd(a, imm8, sae) _mm512_roundscale_round_pd(a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_pd(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_roundscale_round_pd(a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_roundscale_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_roundscale_round_pd_envp; \ - int simde_mm512_roundscale_round_pd_x = feholdexcept(&simde_mm512_roundscale_round_pd_envp); \ - simde_mm512_roundscale_round_pd_r = simde_mm512_roundscale_pd(a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_roundscale_round_pd_x == 0)) \ - fesetenv(&simde_mm512_roundscale_round_pd_envp); \ - } \ - else { \ - simde_mm512_roundscale_round_pd_r = simde_mm512_roundscale_pd(a, imm8); \ - } \ - \ - simde_mm512_roundscale_round_pd_r; \ - })) - #else - #define simde_mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_pd(a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_roundscale_round_pd (simde__m512d a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_roundscale_pd(a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_roundscale_pd(a, imm8); - #endif - } - else { - r = simde_mm512_roundscale_pd(a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_roundscale_round_pd - #define _mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_round_pd(a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) _mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_pd(src, k, a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_mask_roundscale_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_mask_roundscale_round_pd_envp; \ - int simde_mm512_mask_roundscale_round_pd_x = feholdexcept(&simde_mm512_mask_roundscale_round_pd_envp); \ - simde_mm512_mask_roundscale_round_pd_r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_mask_roundscale_round_pd_x == 0)) \ - fesetenv(&simde_mm512_mask_roundscale_round_pd_envp); \ - } \ - else { \ - simde_mm512_mask_roundscale_round_pd_r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); \ - } \ - \ - simde_mm512_mask_roundscale_round_pd_r; \ - })) - #else - #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_pd(src, k, a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_mask_roundscale_round_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); - #endif - } - else { - r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_roundscale_round_pd - #define _mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) _mm512_maskz_roundscale_round_pd(k, a, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) simde_mm512_maskz_roundscale_pd(k, a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d simde_mm512_maskz_roundscale_round_pd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm512_maskz_roundscale_round_pd_envp; \ - int simde_mm512_maskz_roundscale_round_pd_x = feholdexcept(&simde_mm512_maskz_roundscale_round_pd_envp); \ - simde_mm512_maskz_roundscale_round_pd_r = simde_mm512_maskz_roundscale_pd(k, a, imm8); \ - if (HEDLEY_LIKELY(simde_mm512_maskz_roundscale_round_pd_x == 0)) \ - fesetenv(&simde_mm512_maskz_roundscale_round_pd_envp); \ - } \ - else { \ - simde_mm512_maskz_roundscale_round_pd_r = simde_mm512_maskz_roundscale_pd(k, a, imm8); \ - } \ - \ - simde_mm512_maskz_roundscale_round_pd_r; \ - })) - #else - #define simde_mm512_maskz_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_maskz_roundscale_pd(k, a, imm8) - #endif -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512d - simde_mm512_maskz_roundscale_round_pd (simde__mmask8 k, simde__m512d a, int imm8, int sae) - SIMDE_REQUIRE_RANGE(imm8, 0, 15) { - simde__m512d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm512_maskz_roundscale_pd(k, a, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm512_maskz_roundscale_pd(k, a, imm8); - #endif - } - else { - r = simde_mm512_maskz_roundscale_pd(k, a, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_roundscale_round_pd - #define _mm512_maskz_roundscale_round_pd(k, a, imm8, sae) simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_round_ss(a, b, imm8, sae) _mm_roundscale_round_ss(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_ss(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_roundscale_round_ss(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_roundscale_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_roundscale_round_ss_envp; \ - int simde_mm_roundscale_round_ss_x = feholdexcept(&simde_mm_roundscale_round_ss_envp); \ - simde_mm_roundscale_round_ss_r = simde_mm_roundscale_ss(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_roundscale_round_ss_x == 0)) \ - fesetenv(&simde_mm_roundscale_round_ss_envp); \ - } \ - else { \ - simde_mm_roundscale_round_ss_r = simde_mm_roundscale_ss(a, b, imm8); \ - } \ - \ - simde_mm_roundscale_round_ss_r; \ - })) - #else - #define simde_mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_ss(a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_roundscale_round_ss (simde__m128 a, simde__m128 b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_roundscale_ss(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_roundscale_ss(a, b, imm8); - #endif - } - else { - r = simde_mm_roundscale_ss(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_round_ss - #define _mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_round_ss(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) _mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_mask_roundscale_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_roundscale_round_ss_envp; \ - int simde_mm_mask_roundscale_round_ss_x = feholdexcept(&simde_mm_mask_roundscale_round_ss_envp); \ - simde_mm_mask_roundscale_round_ss_r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_roundscale_round_ss_x == 0)) \ - fesetenv(&simde_mm_mask_roundscale_round_ss_envp); \ - } \ - else { \ - simde_mm_mask_roundscale_round_ss_r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); \ - } \ - \ - simde_mm_mask_roundscale_round_ss_r; \ - })) - #else - #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_mask_roundscale_round_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_round_ss - #define _mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) _mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_ss(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128 simde_mm_maskz_roundscale_round_ss_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_roundscale_round_ss_envp; \ - int simde_mm_maskz_roundscale_round_ss_x = feholdexcept(&simde_mm_maskz_roundscale_round_ss_envp); \ - simde_mm_maskz_roundscale_round_ss_r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_roundscale_round_ss_x == 0)) \ - fesetenv(&simde_mm_maskz_roundscale_round_ss_envp); \ - } \ - else { \ - simde_mm_maskz_roundscale_round_ss_r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); \ - } \ - \ - simde_mm_maskz_roundscale_round_ss_r; \ - })) - #else - #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_ss(k, a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde_mm_maskz_roundscale_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128 r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); - #endif - } - else { - r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_round_ss - #define _mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -#pragma warning( pop ) -#endif - - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_roundscale_round_sd(a, b, imm8, sae) _mm_roundscale_round_sd(a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_sd(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_roundscale_round_sd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_roundscale_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_roundscale_round_sd_envp; \ - int simde_mm_roundscale_round_sd_x = feholdexcept(&simde_mm_roundscale_round_sd_envp); \ - simde_mm_roundscale_round_sd_r = simde_mm_roundscale_sd(a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_roundscale_round_sd_x == 0)) \ - fesetenv(&simde_mm_roundscale_round_sd_envp); \ - } \ - else { \ - simde_mm_roundscale_round_sd_r = simde_mm_roundscale_sd(a, b, imm8); \ - } \ - \ - simde_mm_roundscale_round_sd_r; \ - })) - #else - #define simde_mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_sd(a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_roundscale_round_sd (simde__m128d a, simde__m128d b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_roundscale_sd(a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_roundscale_sd(a, b, imm8); - #endif - } - else { - r = simde_mm_roundscale_sd(a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_roundscale_round_sd - #define _mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_round_sd(a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) _mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_mask_roundscale_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_mask_roundscale_round_sd_envp; \ - int simde_mm_mask_roundscale_round_sd_x = feholdexcept(&simde_mm_mask_roundscale_round_sd_envp); \ - simde_mm_mask_roundscale_round_sd_r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_mask_roundscale_round_sd_x == 0)) \ - fesetenv(&simde_mm_mask_roundscale_round_sd_envp); \ - } \ - else { \ - simde_mm_mask_roundscale_round_sd_r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); \ - } \ - \ - simde_mm_mask_roundscale_round_sd_r; \ - })) - #else - #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_mask_roundscale_round_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); - #endif - } - else { - r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_roundscale_round_sd - #define _mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) - #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) _mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) -#elif defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_sd(k, a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) - #if defined(SIMDE_HAVE_FENV_H) - #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ - simde__m128d simde_mm_maskz_roundscale_round_sd_r; \ - \ - if (sae & SIMDE_MM_FROUND_NO_EXC) { \ - fenv_t simde_mm_maskz_roundscale_round_sd_envp; \ - int simde_mm_maskz_roundscale_round_sd_x = feholdexcept(&simde_mm_maskz_roundscale_round_sd_envp); \ - simde_mm_maskz_roundscale_round_sd_r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); \ - if (HEDLEY_LIKELY(simde_mm_maskz_roundscale_round_sd_x == 0)) \ - fesetenv(&simde_mm_maskz_roundscale_round_sd_envp); \ - } \ - else { \ - simde_mm_maskz_roundscale_round_sd_r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); \ - } \ - \ - simde_mm_maskz_roundscale_round_sd_r; \ - })) - #else - #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_sd(k, a, b, imm8) - #endif -#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde_mm_maskz_roundscale_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, const int imm8, const int sae) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) - SIMDE_REQUIRE_CONSTANT(sae) { - simde__m128d r; - - if (sae & SIMDE_MM_FROUND_NO_EXC) { - #if defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); - #endif - } - else { - r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); - } - - return r; - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_roundscale_round_sd - #define _mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H) */ -/* :: End simde/x86/avx512/roundscale_round.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sad.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SAD_H) -#define SIMDE_X86_AVX512_SAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sad_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sad_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sad_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 8) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sad_epu8 - #define _mm512_sad_epu8(a, b) simde_mm512_sad_epu8(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SAD_H) */ -/* :: End simde/x86/avx512/sad.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/scalef.h :: */ -#if !defined(SIMDE_X86_AVX512_SCALEF_H) -#define SIMDE_X86_AVX512_SCALEF_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/svml.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_SVML_H) -#define SIMDE_X86_SVML_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/xorsign.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -/* This is a SIMDe extension which is not part of AVX-512. It exists - * because a lot of numerical methods in SIMDe have algoriths which do - * something like: - * - * float sgn = input < 0 ? -1 : 1; - * ... - * return res * sgn; - * - * Which can be replaced with a much more efficient call to xorsign: - * - * return simde_x_mm512_xorsign_ps(res, input); - * - * While this was originally intended for use in SIMDe, please feel - * free to use it in your code. - */ - -#if !defined(SIMDE_X86_AVX512_XORSIGN_H) -#define SIMDE_X86_AVX512_XORSIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { - return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { - return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_XORSIGN_H) */ -/* :: End simde/x86/avx512/xorsign.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sqrt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SQRT_H) -#define SIMDE_X86_AVX512_SQRT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); - r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_ps - #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); - r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_pd - #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SQRT_H) */ -/* :: End simde/x86/avx512/sqrt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-complex.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - */ - -/* Support for complex math. - * - * We try to avoid inculding (in C++ mode) since it pulls in - * a *lot* of code. Unfortunately this only works for GNU modes (i.e., - * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals, - * but there is no way (AFAICT) to detect that flag so we have to rely - * on __STRICT_ANSI__ to instead detect GNU mode. - * - * This header is separate from simde-math.h since there is a good - * chance it will pull in , and most of the time we don't need - * complex math (on x86 only SVML uses it). */ - -#if !defined(SIMDE_COMPLEX_H) -#define SIMDE_COMPLEX_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if ( \ - HEDLEY_HAS_BUILTIN(__builtin_creal) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) \ - ) && (!defined(__cplusplus) && !defined(__STRICT_ANSI__)) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ - typedef __complex__ float simde_cfloat32; - typedef __complex__ double simde_cfloat64; - HEDLEY_DIAGNOSTIC_POP - #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) - #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) - - #if !defined(simde_math_creal) - #define simde_math_crealf(z) __builtin_crealf(z) - #endif - #if !defined(simde_math_crealf) - #define simde_math_creal(z) __builtin_creal(z) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimagf(z) __builtin_cimagf(z) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimag(z) __builtin_cimag(z) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) __builtin_cexp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) __builtin_cexpf(z) - #endif -#elif !defined(__cplusplus) - #include - - #if !defined(HEDLEY_MSVC_VERSION) - typedef float _Complex simde_cfloat32; - typedef double _Complex simde_cfloat64; - #else - typedef _Fcomplex simde_cfloat32; - typedef _Dcomplex simde_cfloat64; - #endif - - #if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) }) - #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) }) - #elif defined(CMPLX) && defined(CMPLXF) - #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) - #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) - #else - #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) - #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) - #endif - - #if !defined(simde_math_creal) - #define simde_math_creal(z) creal(z) - #endif - #if !defined(simde_math_crealf) - #define simde_math_crealf(z) crealf(z) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimag(z) cimag(z) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimagf(z) cimagf(z) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) cexp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) cexpf(z) - #endif -#else - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - #pragma warning(disable:4530) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - - typedef std::complex simde_cfloat32; - typedef std::complex simde_cfloat64; - #define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) - #define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) - - #if !defined(simde_math_creal) - #define simde_math_creal(z) ((z).real()) - #endif - #if !defined(simde_math_crealf) - #define simde_math_crealf(z) ((z).real()) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimag(z) ((z).imag()) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimagf(z) ((z).imag()) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) std::exp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) std::exp(z) - #endif -#endif - -#endif /* !defined(SIMDE_COMPLEX_H) */ -/* :: End simde/simde-complex.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_acos_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf4_u10(a); - #else - return Sleef_acosf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acos_ps - #define _mm_acos_ps(a) simde_mm_acos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_acos_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd2_u10(a); - #else - return Sleef_acosd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acos_pd - #define _mm_acos_pd(a) simde_mm_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_acos_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf8_u10(a); - #else - return Sleef_acosf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_acos_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acos_ps - #define _mm256_acos_ps(a) simde_mm256_acos_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_acos_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd4_u10(a); - #else - return Sleef_acosd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acos_pd - #define _mm256_acos_pd(a) simde_mm256_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_acos_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf16_u10(a); - #else - return Sleef_acosf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acos_ps - #define _mm512_acos_ps(a) simde_mm512_acos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_acos_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd8_u10(a); - #else - return Sleef_acosd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acos_pd - #define _mm512_acos_pd(a) simde_mm512_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acos_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acos_ps - #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acos_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acos_pd - #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_acosh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acosh_ps - #define _mm_acosh_ps(a) simde_mm_acosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_acosh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acosh_pd - #define _mm_acosh_pd(a) simde_mm_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_acosh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acosh_ps - #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_acosh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acosh_pd - #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_acosh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acosh_ps - #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_acosh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acosh_pd - #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acosh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acosh_ps - #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acosh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acosh_pd - #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_asin_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf4_u10(a); - #else - return Sleef_asinf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asin_ps - #define _mm_asin_ps(a) simde_mm_asin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_asin_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind2_u10(a); - #else - return Sleef_asind2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asin_pd - #define _mm_asin_pd(a) simde_mm_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_asin_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf8_u10(a); - #else - return Sleef_asinf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_asin_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asin_ps - #define _mm256_asin_ps(a) simde_mm256_asin_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_asin_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind4_u10(a); - #else - return Sleef_asind4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asin_pd - #define _mm256_asin_pd(a) simde_mm256_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_asin_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf16_u10(a); - #else - return Sleef_asinf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asin_ps - #define _mm512_asin_ps(a) simde_mm512_asin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_asin_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind8_u10(a); - #else - return Sleef_asind8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asin_pd - #define _mm512_asin_pd(a) simde_mm512_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asin_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asin_ps - #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asin_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asin_pd - #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_asinh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asinh_ps - #define _mm_asinh_ps(a) simde_mm_asinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_asinh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asinh_pd - #define _mm_asinh_pd(a) simde_mm_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_asinh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asinh_ps - #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_asinh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asinh_pd - #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_asinh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asinh_ps - #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_asinh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asinh_pd - #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asinh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asinh_ps - #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asinh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asinh_pd - #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atan_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf4_u10(a); - #else - return Sleef_atanf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan_ps - #define _mm_atan_ps(a) simde_mm_atan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atan_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand2_u10(a); - #else - return Sleef_atand2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan_pd - #define _mm_atan_pd(a) simde_mm_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atan_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf8_u10(a); - #else - return Sleef_atanf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atan_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan_ps - #define _mm256_atan_ps(a) simde_mm256_atan_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atan_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand4_u10(a); - #else - return Sleef_atand4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan_pd - #define _mm256_atan_pd(a) simde_mm256_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atan_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf16_u10(a); - #else - return Sleef_atanf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan_ps - #define _mm512_atan_ps(a) simde_mm512_atan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atan_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand8_u10(a); - #else - return Sleef_atand8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan_pd - #define _mm512_atan_pd(a) simde_mm512_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan_ps - #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan_pd - #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atan2_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f4_u10(a, b); - #else - return Sleef_atan2f4_u35(a, b); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan2_ps - #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atan2_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d2_u10(a, b); - #else - return Sleef_atan2d2_u35(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan2_pd - #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f8_u10(a, b); - #else - return Sleef_atan2f8_u35(a, b); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan2_ps - #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d4_u10(a, b); - #else - return Sleef_atan2d4_u35(a, b); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan2_pd - #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f16_u10(a, b); - #else - return Sleef_atan2f16_u35(a, b); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan2_ps - #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d8_u10(a, b); - #else - return Sleef_atan2d8_u35(a, b); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan2_pd - #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan2_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan2_ps - #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan2_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan2_pd - #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atanh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atanh_ps - #define _mm_atanh_ps(a) simde_mm_atanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atanh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atanh_pd - #define _mm_atanh_pd(a) simde_mm_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atanh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atanh_ps - #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atanh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atanh_pd - #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atanh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atanh_ps - #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atanh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atanh_pd - #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atanh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atanh_ps - #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atanh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atanh_pd - #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cbrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cbrt_ps - #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cbrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cbrt_pd - #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cbrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cbrt_ps - #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cbrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cbrt_pd - #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cbrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cbrt_ps - #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cbrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cbrt_pd - #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cbrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cbrt_ps - #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cbrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cbrt_pd - #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cexp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cexp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); - r_.f32[ i ] = simde_math_crealf(val); - r_.f32[i + 1] = simde_math_cimagf(val); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cexp_ps - #define _mm_cexp_ps(a) simde_mm_cexp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cexp_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cexp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); - r_.f32[ i ] = simde_math_crealf(val); - r_.f32[i + 1] = simde_math_cimagf(val); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cexp_ps - #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cos_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf4_u10(a); - #else - return Sleef_cosf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cos_ps - #define _mm_cos_ps(a) simde_mm_cos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cos_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd2_u10(a); - #else - return Sleef_cosd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cos_pd - #define _mm_cos_pd(a) simde_mm_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cos_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf8_u10(a); - #else - return Sleef_cosf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cos_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cos_ps - #define _mm256_cos_ps(a) simde_mm256_cos_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cos_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd4_u10(a); - #else - return Sleef_cosd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cos_pd - #define _mm256_cos_pd(a) simde_mm256_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cos_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf16_u10(a); - #else - return Sleef_cosf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cos_ps - #define _mm512_cos_ps(a) simde_mm512_cos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cos_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd8_u10(a); - #else - return Sleef_cosd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cos_pd - #define _mm512_cos_pd(a) simde_mm512_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cos_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cos_ps - #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cos_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cos_pd - #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deg2rad_ps(simde__m128 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deg2rad_pd(simde__m128d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deg2rad_ps(simde__m256 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) - return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F - }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deg2rad_pd(simde__m256d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) - return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_deg2rad_ps(simde__m512 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) - return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F - }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_deg2rad_pd(simde__m512d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) - return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { - SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, - SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 - }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cosd_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosd_ps - #define _mm_cosd_ps(a) simde_mm_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cosd_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosd_pd - #define _mm_cosd_pd(a) simde_mm_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cosd_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosd_ps - #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cosd_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosd_pd - #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cosd_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosd_ps - #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cosd_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosd_pd - #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosd_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosd_ps - #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosd_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosd_pd - #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cosh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosh_ps - #define _mm_cosh_ps(a) simde_mm_cosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cosh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosh_pd - #define _mm_cosh_pd(a) simde_mm_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cosh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosh_ps - #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cosh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosh_pd - #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cosh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosh_ps - #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cosh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosh_pd - #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosh_ps - #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosh_pd - #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi8 - #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi16 - #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi32 - #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b) - #undef _mm_idiv_epi32 - #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi64 - #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu8 - #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu16 - #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu32 - #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b) - #undef _mm_udiv_epi32 - #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu64 - #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi8 - #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi16 - #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi32 - #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b) - #undef _mm256_idiv_epi32 - #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi64 - #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu8 - #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu16 - #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu32 - #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b) - #undef _mm256_udiv_epi32 - #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu64 - #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi8 - #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi16 - #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi32 - #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_epi32 - #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi64 - #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu8 - #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu16 - #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu32 - #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_epu32 - #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu64 - #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erf_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erff4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erf_ps - #define _mm_erf_ps(a) simde_mm_erf_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erf_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erf_pd - #define _mm_erf_pd(a) simde_mm_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erf_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erff8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erf_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erf_ps - #define _mm256_erf_ps(a) simde_mm256_erf_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erf_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erf_pd - #define _mm256_erf_pd(a) simde_mm256_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erf_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erff16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erf_ps - #define _mm512_erf_ps(a) simde_mm512_erf_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erf_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erf_pd - #define _mm512_erf_pd(a) simde_mm512_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erf_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erf_ps - #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erf_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erf_pd - #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfc_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcf4_u15(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfc_ps - #define _mm_erfc_ps(a) simde_mm_erfc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfc_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcd2_u15(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfc_pd - #define _mm_erfc_pd(a) simde_mm_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfc_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcf8_u15(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfc_ps - #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfc_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcd4_u15(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfc_pd - #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfc_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcf16_u15(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfc_ps - #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfc_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcd8_u15(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfc_pd - #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfc_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfc_ps - #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfc_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfc_pd - #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp_ps - #define _mm_exp_ps(a) simde_mm_exp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp_pd - #define _mm_exp_pd(a) simde_mm_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp_ps - #define _mm256_exp_ps(a) simde_mm256_exp_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp_pd - #define _mm256_exp_pd(a) simde_mm256_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp_ps - #define _mm512_exp_ps(a) simde_mm512_exp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp_pd - #define _mm512_exp_pd(a) simde_mm512_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp_ps - #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp_pd - #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_expm1_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_expm1_ps - #define _mm_expm1_ps(a) simde_mm_expm1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_expm1_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_expm1_pd - #define _mm_expm1_pd(a) simde_mm_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_expm1_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_expm1_ps - #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_expm1_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_expm1_pd - #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_expm1_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_expm1_ps - #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_expm1_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_expm1_pd - #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_expm1_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_expm1_ps - #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_expm1_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_expm1_pd - #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp2_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp2_ps - #define _mm_exp2_ps(a) simde_mm_exp2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp2_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp2_pd - #define _mm_exp2_pd(a) simde_mm_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp2_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp2_ps - #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp2_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp2_pd - #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp2_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp2_ps - #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp2_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp2_pd - #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp2_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp2_ps - #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp2_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp2_pd - #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp10_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp10_ps - #define _mm_exp10_ps(a) simde_mm_exp10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp10_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp10_pd - #define _mm_exp10_pd(a) simde_mm_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp10_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp10_ps - #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp10_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp10_pd - #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp10_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp10_ps - #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp10_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp10_pd - #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp10_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp10_ps - #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp10_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp10_pd - #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cdfnorm_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m128 y = simde_mm_mul_ps(a5, t); - y = simde_mm_add_ps(y, a4); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a3); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a2); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a1); - y = simde_mm_mul_ps(y, t); - y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x)))); - y = simde_mm_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorm_ps - #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cdfnorm_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m128d y = simde_mm_mul_pd(a5, t); - y = simde_mm_add_pd(y, a4); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a3); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a2); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a1); - y = simde_mm_mul_pd(y, t); - y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x)))); - y = simde_mm_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorm_pd - #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cdfnorm_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m256 y = simde_mm256_mul_ps(a5, t); - y = simde_mm256_add_ps(y, a4); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a3); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a2); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a1); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x)))); - y = simde_mm256_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorm_ps - #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cdfnorm_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m256d y = simde_mm256_mul_pd(a5, t); - y = simde_mm256_add_pd(y, a4); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a3); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a2); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a1); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x)))); - y = simde_mm256_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorm_pd - #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cdfnorm_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m512 y = simde_mm512_mul_ps(a5, t); - y = simde_mm512_add_ps(y, a4); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a3); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a2); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a1); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x)))); - y = simde_mm512_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a))); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorm_ps - #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cdfnorm_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m512d y = simde_mm512_mul_pd(a5, t); - y = simde_mm512_add_pd(y, a4); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a3); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a2); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a1); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x)))); - y = simde_mm512_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a))); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorm_pd - #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorm_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorm_ps - #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorm_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorm_pd - #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b); - #else - simde__m128i r; - - r = simde_mm_div_epi32(a, b); - *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_idivrem_epi32 - #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); - #else - simde__m256i r; - - r = simde_mm256_div_epi32(a, b); - *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_idivrem_epi32 - #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hypot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf4_u05(a, b); - #else - return Sleef_hypotf4_u35(a, b); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_hypot_ps - #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hypot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd2_u05(a, b); - #else - return Sleef_hypotd2_u35(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_hypot_pd - #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf8_u05(a, b); - #else - return Sleef_hypotf8_u35(a, b); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_hypot_ps - #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd4_u05(a, b); - #else - return Sleef_hypotd4_u35(a, b); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_hypot_pd - #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf16_u05(a, b); - #else - return Sleef_hypotf16_u35(a, b); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_hypot_ps - #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd8_u05(a, b); - #else - return Sleef_hypotd8_u35(a, b); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_hypot_pd - #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_hypot_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_hypot_ps - #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_hypot_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_hypot_pd - #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_invcbrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invcbrt_ps(a); - #else - return simde_mm_rcp_ps(simde_mm_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invcbrt_ps - #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_invcbrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invcbrt_pd(a); - #else - return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invcbrt_pd - #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_invcbrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invcbrt_ps(a); - #else - return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invcbrt_ps - #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_invcbrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invcbrt_pd(a); - #else - return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invcbrt_pd - #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_invsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invsqrt_ps(a); - #else - return simde_mm_rcp_ps(simde_mm_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invsqrt_ps - #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_invsqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invsqrt_pd(a); - #else - return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invsqrt_pd - #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_invsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invsqrt_ps(a); - #else - return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invsqrt_ps - #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_invsqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invsqrt_pd(a); - #else - return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invsqrt_pd - #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_invsqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_invsqrt_ps(a); - #else - return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_invsqrt_ps - #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_invsqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_invsqrt_pd(a); - #else - return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_invsqrt_pd - #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_invsqrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_invsqrt_ps - #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_invsqrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_invsqrt_pd - #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf4_u10(a); - #else - return Sleef_logf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log_ps - #define _mm_log_ps(a) simde_mm_log_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd2_u10(a); - #else - return Sleef_logd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log_pd - #define _mm_log_pd(a) simde_mm_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf8_u10(a); - #else - return Sleef_logf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log_ps - #define _mm256_log_ps(a) simde_mm256_log_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd4_u10(a); - #else - return Sleef_logd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log_pd - #define _mm256_log_pd(a) simde_mm256_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf16_u10(a); - #else - return Sleef_logf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log_ps - #define _mm512_log_ps(a) simde_mm512_log_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd8_u10(a); - #else - return Sleef_logd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log_pd - #define _mm512_log_pd(a) simde_mm512_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log_ps - #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log_pd - #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cdfnorminv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128 matched, retval = simde_mm_setzero_ps(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)))); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* else if (a == 1) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425))); - /* else if (a > 0.97575) */ - simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575))); - - simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi); - matched = simde_mm_or_ps(matched, mask); - - /* else */ - simde__m128 mask_el = simde_x_mm_not_ps(matched); - mask = simde_mm_or_ps(mask, mask_el); - - /* r = a - 0.5f */ - simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m128 q = simde_mm_and_ps(mask_lo, a); - q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a))); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm_log_ps(q); - q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); - { - simde__m128 multiplier; - multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0))); - multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0)))); - multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r)); - numerator = simde_mm_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), - simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - return retval; - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorminv_ps - #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cdfnorminv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128d matched, retval = simde_mm_setzero_pd(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)))); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* else if (a == 1) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425))); - /* else if (a > 0.97575) */ - simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575))); - - simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi); - matched = simde_mm_or_pd(matched, mask); - - /* else */ - simde__m128d mask_el = simde_x_mm_not_pd(matched); - mask = simde_mm_or_pd(mask, mask_el); - - /* r = a - 0.5 */ - simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m128d q = simde_mm_and_pd(mask_lo, a); - q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a))); - - /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ - q = simde_mm_log_pd(q); - q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el); - - /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ - /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ - /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); - { - simde__m128d multiplier; - multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0))); - multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0)))); - multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r)); - numerator = simde_mm_mul_pd(numerator, multiplier); - } - - /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), - simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - return retval; - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorminv_pd - #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cdfnorminv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256 matched, retval = simde_mm256_setzero_ps(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ)); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* else if (a == 1) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi); - matched = simde_mm256_or_ps(matched, mask); - - /* else */ - simde__m256 mask_el = simde_x_mm256_not_ps(matched); - mask = simde_mm256_or_ps(mask, mask_el); - - /* r = a - 0.5f */ - simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m256 q = simde_mm256_and_ps(mask_lo, a); - q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a))); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm256_log_ps(q); - q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm256_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); - { - simde__m256 multiplier; - multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0))); - multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0)))); - multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r)); - numerator = simde_mm256_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), - simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - return retval; - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorminv_ps - #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cdfnorminv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256d matched, retval = simde_mm256_setzero_pd(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ)); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* else if (a == 1) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi); - matched = simde_mm256_or_pd(matched, mask); - - /* else */ - simde__m256d mask_el = simde_x_mm256_not_pd(matched); - mask = simde_mm256_or_pd(mask, mask_el); - - /* r = a - 0.5 */ - simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m256d q = simde_mm256_and_pd(mask_lo, a); - q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a))); - - /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ - q = simde_mm256_log_pd(q); - q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm256_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el); - - /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ - /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ - /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); - { - simde__m256d multiplier; - multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0))); - multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0)))); - multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r)); - numerator = simde_mm256_mul_pd(numerator, multiplier); - } - - /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), - simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - return retval; - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorminv_pd - #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cdfnorminv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]); - } - - return simde__m512_from_private(r_); - #else - - simde__m512 retval = simde_mm512_setzero_ps(); - simde__mmask16 matched; - - { /* if (a < 0 || a > 1) */ - matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)); - } - - { /* else if (a == 1) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF)); - } - - { /* else if (a < 0.02425) */ - simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__mmask16 mask = mask_lo | mask_hi; - matched = matched | mask; - - /* else */ - simde__mmask16 mask_el = ~matched; - - /* r = a - 0.5f */ - simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a); - q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm512_log_ps(q); - q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm512_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_mm512_mask_mul_ps(q, mask_el, r, r); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)))); - { - simde__m512 multiplier; - multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)); - multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0))); - multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r); - numerator = simde_mm512_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)))); - denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q), - simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorminv_ps - #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cdfnorminv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]); - } - - return simde__m512d_from_private(r_); - #else - - simde__m512d retval = simde_mm512_setzero_pd(); - simde__mmask8 matched; - - { /* if (a < 0 || a > 1) */ - matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)); - } - - { /* else if (a == 1) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY)); - } - - { /* else if (a < 0.02425) */ - simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__mmask8 mask = mask_lo | mask_hi; - matched = matched | mask; - - /* else */ - simde__mmask8 mask_el = ~matched; - - /* r = a - 0.5f */ - simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m512d q = a; - q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm512_log_pd(q); - q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm512_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_mm512_mask_mul_pd(q, mask_el, r, r); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)))); - { - simde__m512d multiplier; - multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)); - multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0))); - multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r); - numerator = simde_mm512_mul_pd(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)))); - denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q), - simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorminv_pd - #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorminv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorminv_ps - #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorminv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorminv_pd - #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfinv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */ - simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); - - simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a))); - - simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm_mul_ps(tt2, lnx); - - simde__m128 r = simde_mm_mul_ps(tt1, tt1); - r = simde_mm_sub_ps(r, tt2); - r = simde_mm_sqrt_ps(r); - r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r); - r = simde_mm_sqrt_ps(r); - - return simde_x_mm_xorsign_ps(r, a); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfinv_ps - #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfinv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); - - simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a))); - - simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm_mul_pd(tt2, lnx); - - simde__m128d r = simde_mm_mul_pd(tt1, tt1); - r = simde_mm_sub_pd(r, tt2); - r = simde_mm_sqrt_pd(r); - r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r); - r = simde_mm_sqrt_pd(r); - - return simde_x_mm_xorsign_pd(r, a); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfinv_pd - #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfinv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); - simde__m256 sgn = simde_x_mm256_copysign_ps(one, a); - - a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a)); - simde__m256 lnx = simde_mm256_log_ps(a); - - simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm256_mul_ps(tt2, lnx); - - simde__m256 r = simde_mm256_mul_ps(tt1, tt1); - r = simde_mm256_sub_ps(r, tt2); - r = simde_mm256_sqrt_ps(r); - r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r); - r = simde_mm256_sqrt_ps(r); - - return simde_mm256_mul_ps(sgn, r); - #else - simde__m256_private - a_ = simde__m256_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfinv_ps - #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfinv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); - simde__m256d sgn = simde_x_mm256_copysign_pd(one, a); - - a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a)); - simde__m256d lnx = simde_mm256_log_pd(a); - - simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm256_mul_pd(tt2, lnx); - - simde__m256d r = simde_mm256_mul_pd(tt1, tt1); - r = simde_mm256_sub_pd(r, tt2); - r = simde_mm256_sqrt_pd(r); - r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r); - r = simde_mm256_sqrt_pd(r); - - return simde_mm256_mul_pd(sgn, r); - #else - simde__m256d_private - a_ = simde__m256d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfinv_pd - #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfinv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); - simde__m512 sgn = simde_x_mm512_copysign_ps(one, a); - - a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a)); - simde__m512 lnx = simde_mm512_log_ps(a); - - simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm512_mul_ps(tt2, lnx); - - simde__m512 r = simde_mm512_mul_ps(tt1, tt1); - r = simde_mm512_sub_ps(r, tt2); - r = simde_mm512_sqrt_ps(r); - r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r); - r = simde_mm512_sqrt_ps(r); - - return simde_mm512_mul_ps(sgn, r); - #else - simde__m512_private - a_ = simde__m512_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfinv_ps - #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfinv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); - simde__m512d sgn = simde_x_mm512_copysign_pd(one, a); - - a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a)); - simde__m512d lnx = simde_mm512_log_pd(a); - - simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm512_mul_pd(tt2, lnx); - - simde__m512d r = simde_mm512_mul_pd(tt1, tt1); - r = simde_mm512_sub_pd(r, tt2); - r = simde_mm512_sqrt_pd(r); - r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r); - r = simde_mm512_sqrt_pd(r); - - return simde_mm512_mul_pd(sgn, r); - #else - simde__m512d_private - a_ = simde__m512d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfinv_pd - #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfinv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfinv_ps - #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfinv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfinv_pd - #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfcinv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128 matched, retval = simde_mm_setzero_ps(); - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); - matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)))); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) { - retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))); - mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)))); - mask = simde_mm_andnot_ps(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); - t = simde_mm_sqrt_ps(t); - t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m128 p[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910)) - }; - - const simde__m128 q[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm_fmadd_ps(denominator, t, q[0]); - - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); - t = simde_mm_sqrt_ps(t); - t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m128 p[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) - }; - - const simde__m128 q[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm_fmadd_ps(denominator, t, q[0]); - - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - - if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfcinv_ps - #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfcinv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128d matched, retval = simde_mm_setzero_pd(); - - { /* if (a < 2.0 && a > 0.0625) */ - matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))); - matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)))); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) { - retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625 && a > 0.0) */ - simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))); - mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)))); - mask = simde_mm_andnot_pd(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); - t = simde_mm_sqrt_pd(t); - t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m128d p[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910)) - }; - - const simde__m128d q[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm_fmadd_pd(denominator, t, q[0]); - - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - } - - { /* else if (a < 0.0) */ - simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); - t = simde_mm_sqrt_pd(t); - t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m128d p[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) - }; - - const simde__m128d q[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm_fmadd_pd(denominator, t, q[0]); - - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - - if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0) */ - retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfcinv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfcinv_pd - #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfcinv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256 matched, retval = simde_mm256_setzero_ps(); - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); - matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ)); - - if (!simde_mm256_testz_ps(matched, matched)) { - retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ)); - mask = simde_mm256_andnot_ps(matched, mask); - - if (!simde_mm256_testz_ps(mask, mask)) { - matched = simde_mm256_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); - t = simde_mm256_sqrt_ps(t); - t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m256 p[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) - }; - - const simde__m256 q[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); - - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - - if (!simde_mm256_testz_ps(mask, mask)) { - matched = simde_mm256_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); - t = simde_mm256_sqrt_ps(t); - t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m256 p[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000)) - }; - - const simde__m256 q[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); - - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - - if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfcinv_ps - #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfcinv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256d matched, retval = simde_mm256_setzero_pd(); - - { /* if (a < 2.0 && a > 0.0625) */ - matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); - matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ)); - - if (!simde_mm256_testz_pd(matched, matched)) { - retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625 && a > 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ)); - mask = simde_mm256_andnot_pd(matched, mask); - - if (!simde_mm256_testz_pd(mask, mask)) { - matched = simde_mm256_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); - t = simde_mm256_sqrt_pd(t); - t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m256d p[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) - }; - - const simde__m256d q[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); - - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - } - - { /* else if (a < 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - - if (!simde_mm256_testz_pd(mask, mask)) { - matched = simde_mm256_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); - t = simde_mm256_sqrt_pd(t); - t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m256d p[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000)) - }; - - const simde__m256d q[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); - - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - - if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0) */ - retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfcinv(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfcinv_pd - #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfcinv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64)) - /* The results on Arm are *slightly* off, which causes problems for - * the edge cases; for example, if you pass 2.0 sqrt will be called - * with a value of -0.0 instead of 0.0, resulting in a NaN. */ - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]); - } - return simde__m512_from_private(r_); - #else - simde__m512 retval = simde_mm512_setzero_ps(); - simde__mmask16 matched; - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); - matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ); - - if (matched != 0) { - retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (matched == 1) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); - mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); - t = simde_mm512_sqrt_ps(t); - t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m512 p[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) - }; - - const simde__m512 q[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); - - simde__m512 res = simde_mm512_div_ps(numerator, denominator); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); - t = simde_mm512_sqrt_ps(t); - t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m512 p[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) - }; - - const simde__m512 q[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); - - simde__m512 res = simde_mm512_div_ps(numerator, denominator); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - - if (matched == 1) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = ~matched & mask; - matched = matched | mask; - - simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfcinv_ps - #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfcinv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]); - } - return simde__m512d_from_private(r_); - #else - simde__m512d retval = simde_mm512_setzero_pd(); - simde__mmask8 matched; - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); - matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ); - - if (matched != 0) { - retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (matched == 1) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); - mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); - t = simde_mm512_sqrt_pd(t); - t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m512d p[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) - }; - - const simde__m512d q[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); - - simde__m512d res = simde_mm512_div_pd(numerator, denominator); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); - t = simde_mm512_sqrt_pd(t); - t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m512d p[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) - }; - - const simde__m512d q[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); - - simde__m512d res = simde_mm512_div_pd(numerator, denominator); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - - if (matched == 1) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = ~matched & mask; - matched = matched | mask; - - simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfcinv_pd - #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfcinv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfcinv_ps - #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfcinv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfcinv_pd - #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_logb_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_logb_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_logb_ps - #define _mm_logb_ps(a) simde_mm_logb_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_logb_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_logb_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_logb_pd - #define _mm_logb_pd(a) simde_mm_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_logb_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_logb_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_logb_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_logb_ps - #define _mm256_logb_ps(a) simde_mm256_logb_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_logb_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_logb_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_logb_pd - #define _mm256_logb_pd(a) simde_mm256_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_logb_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_logb_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_logb_ps - #define _mm512_logb_ps(a) simde_mm512_logb_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_logb_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_logb_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_logb_pd - #define _mm512_logb_pd(a) simde_mm512_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_logb_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_logb_ps - #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_logb_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_logb_pd - #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log2_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f4_u35(a); - #else - return Sleef_log2f4_u10(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log2_ps - #define _mm_log2_ps(a) simde_mm_log2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log2_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d2_u35(a); - #else - return Sleef_log2d2_u10(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log2_pd - #define _mm_log2_pd(a) simde_mm_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log2_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f8_u35(a); - #else - return Sleef_log2f8_u10(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log2_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log2_ps - #define _mm256_log2_ps(a) simde_mm256_log2_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log2_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d4_u35(a); - #else - return Sleef_log2d4_u10(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log2_pd - #define _mm256_log2_pd(a) simde_mm256_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log2_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f16_u35(a); - #else - return Sleef_log2f16_u10(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log2_ps - #define _mm512_log2_ps(a) simde_mm512_log2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log2_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d8_u35(a); - #else - return Sleef_log2d8_u10(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log2_pd - #define _mm512_log2_pd(a) simde_mm512_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log2_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log2_ps - #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log2_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log2_pd - #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log1p_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log1p_ps - #define _mm_log1p_ps(a) simde_mm_log1p_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log1p_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log1p_pd - #define _mm_log1p_pd(a) simde_mm_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log1p_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log1p_ps - #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log1p_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log1p_pd - #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log1p_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log1p_ps - #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log1p_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log1p_pd - #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log1p_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log1p_ps - #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log1p_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log1p_pd - #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log10_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log10_ps - #define _mm_log10_ps(a) simde_mm_log10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log10_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log10_pd - #define _mm_log10_pd(a) simde_mm_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log10_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log10_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log10_ps - #define _mm256_log10_ps(a) simde_mm256_log10_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log10_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log10_pd - #define _mm256_log10_pd(a) simde_mm256_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log10_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log10_ps - #define _mm512_log10_ps(a) simde_mm512_log10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log10_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log10_pd - #define _mm512_log10_pd(a) simde_mm512_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log10_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log10_ps - #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log10_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log10_pd - #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_nearbyint_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_nearbyint_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_nearbyint_ps - #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_nearbyint_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_nearbyint_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_nearbyint_pd - #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_nearbyint_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_nearbyint_ps - #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_nearbyint_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_nearbyint_pd - #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_pow_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powf4_u10(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_pow_ps - #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_pow_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powd2_u10(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_pow_pd - #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_pow_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powf8_u10(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_pow_ps - #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_pow_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powd4_u10(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_pow_pd - #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_pow_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powf16_u10(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_pow_ps - #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_pow_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powd8_u10(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_pow_pd - #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_pow_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_pow_ps - #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_pow_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_pow_pd - #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_clog_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_clog_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1])); - r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_clog_ps - #define _mm_clog_ps(a) simde_mm_clog_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_clog_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_clog_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1])); - r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_clog_ps - #define _mm256_clog_ps(a) simde_mm256_clog_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_csqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_csqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); - simde__m128_private pow_res_=simde__m128_to_private(pow_res); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); - simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); - - r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_csqrt_ps - #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_csqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_csqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))); - simde__m256_private pow_res_=simde__m256_to_private(pow_res); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); - simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); - - r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_csqrt_ps - #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi8 - #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi16 - #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi32 - #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b) - #undef _mm_irem_epi32 - #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi64 - #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu8 - #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu16 - #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu32 - #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b) - #undef _mm_urem_epi32 - #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu64 - #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi8 - #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi16 - #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi32 - #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b) - #undef _mm256_irem_epi32 - #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi64 - #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu8 - #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu16 - #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu32 - #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b) - #undef _mm256_urem_epi32 - #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu64 - #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi8 - #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi16 - #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi32 - #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rem_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rem_epi32 - #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi64 - #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu8 - #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu16 - #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu32 - #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rem_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rem_epu32 - #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu64 - #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_recip_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_recip_ps(a); - #else - return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_recip_ps - #define _mm512_recip_ps(a) simde_mm512_recip_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_recip_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_recip_pd(a); - #else - return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_recip_pd - #define _mm512_recip_pd(a) simde_mm512_recip_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_recip_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_recip_ps - #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_recip_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_recip_pd - #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_rint_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rint_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_rintf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rint_ps - #define _mm512_rint_ps(a) simde_mm512_rint_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_rint_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rint_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_rint(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rint_pd - #define _mm512_rint_pd(a) simde_mm512_rint_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rint_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rint_ps - #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rint_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rint_pd - #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sin_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf4_u10(a); - #else - return Sleef_sinf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sin_ps - #define _mm_sin_ps(a) simde_mm_sin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sin_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind2_u10(a); - #else - return Sleef_sind2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sin_pd - #define _mm_sin_pd(a) simde_mm_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sin_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf8_u10(a); - #else - return Sleef_sinf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sin_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sin_ps - #define _mm256_sin_ps(a) simde_mm256_sin_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sin_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind4_u10(a); - #else - return Sleef_sind4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sin_pd - #define _mm256_sin_pd(a) simde_mm256_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sin_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf16_u10(a); - #else - return Sleef_sinf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sin_ps - #define _mm512_sin_ps(a) simde_mm512_sin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sin_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind8_u10(a); - #else - return Sleef_sind8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sin_pd - #define _mm512_sin_pd(a) simde_mm512_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sin_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sin_ps - #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sin_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sin_pd - #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - Sleef___m128_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf4_u10(a); - #else - temp = Sleef_sincosf4_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m128 r; - - r = simde_mm_sin_ps(a); - *mem_addr = simde_mm_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sincos_ps - #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - Sleef___m128d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd2_u10(a); - #else - temp = Sleef_sincosd2_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m128d r; - - r = simde_mm_sin_pd(a); - *mem_addr = simde_mm_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sincos_pd - #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - Sleef___m256_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf8_u10(a); - #else - temp = Sleef_sincosf8_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m256 r; - - r = simde_mm256_sin_ps(a); - *mem_addr = simde_mm256_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sincos_ps - #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - Sleef___m256d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd4_u10(a); - #else - temp = Sleef_sincosd4_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m256d r; - - r = simde_mm256_sin_pd(a); - *mem_addr = simde_mm256_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sincos_pd - #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - Sleef___m512_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf16_u10(a); - #else - temp = Sleef_sincosf16_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m512 r; - - r = simde_mm512_sin_ps(a); - *mem_addr = simde_mm512_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sincos_ps - #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - Sleef___m512d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd8_u10(a); - #else - temp = Sleef_sincosd8_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m512d r; - - r = simde_mm512_sin_pd(a); - *mem_addr = simde_mm512_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sincos_pd - #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a); - #else - simde__m512 cos_res, sin_res; - sin_res = simde_mm512_sincos_ps(&cos_res, a); - *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res); - return simde_mm512_mask_mov_ps(sin_src, k, sin_res); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sincos_ps - #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a); - #else - simde__m512d cos_res, sin_res; - sin_res = simde_mm512_sincos_pd(&cos_res, a); - *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res); - return simde_mm512_mask_mov_pd(sin_src, k, sin_res); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sincos_pd - #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sind_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sind_ps - #define _mm_sind_ps(a) simde_mm_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sind_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sind_pd - #define _mm_sind_pd(a) simde_mm_sind_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sind_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sind_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sind_ps - #define _mm256_sind_ps(a) simde_mm256_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sind_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sind_pd - #define _mm256_sind_pd(a) simde_mm256_sind_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sind_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sind_ps - #define _mm512_sind_ps(a) simde_mm512_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sind_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sind_pd - #define _mm512_sind_pd(a) simde_mm512_sind_pd(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sind_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sind_ps - #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sind_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sind_pd - #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sinh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sinh_ps - #define _mm_sinh_ps(a) simde_mm_sinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sinh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sinh_pd - #define _mm_sinh_pd(a) simde_mm_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sinh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sinh_ps - #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sinh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sinh_pd - #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sinh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sinh_ps - #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sinh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sinh_pd - #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sinh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sinh_ps - #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sinh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sinh_pd - #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_ceil_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceilf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_ceil_ps - #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_ceil_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceild2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_ceil_pd - #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_ceil_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceilf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_ceil_ps - #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_ceil_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceild4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_ceil_pd - #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_ceil_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceilf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_ceil_ps - #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_ceil_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceild8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_ceil_pd - #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_ceil_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ceil_ps - #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_ceil_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ceil_pd - #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_floor_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floorf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_floor_ps - #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_floor_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floord2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_floor_pd - #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_floor_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floorf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_floor_ps - #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_floor_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floord4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_floor_pd - #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_floor_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floorf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_floor_ps - #define _mm512_floor_ps(a) simde_mm512_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_floor_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floord8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_floor_pd - #define _mm512_floor_pd(a) simde_mm512_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_floor_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_floor_ps - #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_floor_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_floor_pd - #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_round_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_round_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundf4(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_round_ps - #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_round_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundd2(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_round_pd - #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_round_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_round_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundf8(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_round_ps - #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_round_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundd4(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_round_pd - #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_svml_round_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_roundd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_round_pd - #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_svml_round_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_svml_round_pd - #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtf4(a); - #else - return simde_mm_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_sqrt_ps - #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtd2(a); - #else - return simde_mm_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_sqrt_pd - #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtf8(a); - #else - return simde_mm256_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_sqrt_ps - #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtd4(a); - #else - return simde_mm256_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_sqrt_pd - #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_svml_sqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtf16(a); - #else - return simde_mm512_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_sqrt_ps - #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_svml_sqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtd8(a); - #else - return simde_mm512_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_sqrt_pd - #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tan_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf4_u10(a); - #else - return Sleef_tanf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tan_ps - #define _mm_tan_ps(a) simde_mm_tan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tan_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand2_u10(a); - #else - return Sleef_tand2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tan_pd - #define _mm_tan_pd(a) simde_mm_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tan_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf8_u10(a); - #else - return Sleef_tanf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tan_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tan_ps - #define _mm256_tan_ps(a) simde_mm256_tan_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tan_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand4_u10(a); - #else - return Sleef_tand4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tan_pd - #define _mm256_tan_pd(a) simde_mm256_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tan_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf16_u10(a); - #else - return Sleef_tanf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tan_ps - #define _mm512_tan_ps(a) simde_mm512_tan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tan_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand8_u10(a); - #else - return Sleef_tand8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tan_pd - #define _mm512_tan_pd(a) simde_mm512_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tan_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tan_ps - #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tan_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tan_pd - #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tand_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tand_ps - #define _mm_tand_ps(a) simde_mm_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tand_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tand_pd - #define _mm_tand_pd(a) simde_mm_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tand_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tand_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tand_ps - #define _mm256_tand_ps(a) simde_mm256_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tand_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tand_pd - #define _mm256_tand_pd(a) simde_mm256_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tand_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tand_ps - #define _mm512_tand_ps(a) simde_mm512_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tand_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tand_pd - #define _mm512_tand_pd(a) simde_mm512_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tand_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tand_ps - #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tand_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tand_pd - #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tanh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tanh_ps - #define _mm_tanh_ps(a) simde_mm_tanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tanh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tanh_pd - #define _mm_tanh_pd(a) simde_mm_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tanh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tanh_ps - #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tanh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tanh_pd - #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tanh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tanh_ps - #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tanh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tanh_pd - #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tanh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tanh_ps - #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tanh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tanh_pd - #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_trunc_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_trunc_ps - #define _mm_trunc_ps(a) simde_mm_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_trunc_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncd2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_trunc_pd - #define _mm_trunc_pd(a) simde_mm_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_trunc_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_trunc_ps - #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_trunc_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncd4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_trunc_pd - #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_trunc_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_truncf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_trunc_ps - #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_trunc_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_truncd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_trunc_pd - #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_trunc_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_trunc_ps - #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_trunc_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_trunc_pd - #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_udivrem_epi32(mem_addr, a, b); - #else - simde__m128i r; - - r = simde_mm_div_epu32(a, b); - *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_udivrem_epi32 - #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); - #else - simde__m256i r; - - r = simde_mm256_div_epu32(a, b); - *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_udivrem_epi32 - #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SVML_H) */ -/* :: End simde/x86/svml.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_scalef_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_scalef_ps(a, b); - #else - return simde_mm_mul_ps(simde_x_mm_flushsubnormal_ps(a), simde_mm_exp2_ps(simde_mm_floor_ps(simde_x_mm_flushsubnormal_ps(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_scalef_ps - #define _mm_scalef_ps(a, b) simde_mm_scalef_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_scalef_ps (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_scalef_ps(src, k, a, b); - #else - return simde_mm_mask_mov_ps(src, k, simde_mm_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_scalef_ps - #define _mm_mask_scalef_ps(src, k, a, b) simde_mm_mask_scalef_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_scalef_ps (simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_scalef_ps(k, a, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_scalef_ps - #define _mm_maskz_scalef_ps(k, a, b) simde_mm_maskz_scalef_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_scalef_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_scalef_ps(a, b); - #else - return simde_mm256_mul_ps(simde_x_mm256_flushsubnormal_ps(a), simde_mm256_exp2_ps(simde_mm256_floor_ps(simde_x_mm256_flushsubnormal_ps(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_scalef_ps - #define _mm256_scalef_ps(a, b) simde_mm256_scalef_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_scalef_ps (simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_scalef_ps(src, k, a, b); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_scalef_ps - #define _mm256_mask_scalef_ps(src, k, a, b) simde_mm256_mask_scalef_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_scalef_ps (simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_scalef_ps(k, a, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_scalef_ps - #define _mm256_maskz_scalef_ps(k, a, b) simde_mm256_maskz_scalef_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_scalef_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_scalef_ps(a, b); - #else - return simde_mm512_mul_ps(simde_x_mm512_flushsubnormal_ps(a), simde_mm512_exp2_ps(simde_mm512_floor_ps(simde_x_mm512_flushsubnormal_ps(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_scalef_ps - #define _mm512_scalef_ps(a, b) simde_mm512_scalef_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_scalef_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_scalef_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_scalef_ps - #define _mm512_mask_scalef_ps(src, k, a, b) simde_mm512_mask_scalef_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_scalef_ps (simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_scalef_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_scalef_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_scalef_ps - #define _mm512_maskz_scalef_ps(k, a, b) simde_mm512_maskz_scalef_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_scalef_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_scalef_pd(a, b); - #else - return simde_mm_mul_pd(simde_x_mm_flushsubnormal_pd(a), simde_mm_exp2_pd(simde_mm_floor_pd(simde_x_mm_flushsubnormal_pd(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_scalef_pd - #define _mm_scalef_pd(a, b) simde_mm_scalef_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_scalef_pd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_scalef_pd(src, k, a, b); - #else - return simde_mm_mask_mov_pd(src, k, simde_mm_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_scalef_pd - #define _mm_mask_scalef_pd(src, k, a, b) simde_mm_mask_scalef_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_scalef_pd (simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_scalef_pd(k, a, b); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_scalef_pd - #define _mm_maskz_scalef_pd(k, a, b) simde_mm_maskz_scalef_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_scalef_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_scalef_pd(a, b); - #else - return simde_mm256_mul_pd(simde_x_mm256_flushsubnormal_pd(a), simde_mm256_exp2_pd(simde_mm256_floor_pd(simde_x_mm256_flushsubnormal_pd(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_scalef_pd - #define _mm256_scalef_pd(a, b) simde_mm256_scalef_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_scalef_pd (simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_scalef_pd(src, k, a, b); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_scalef_pd - #define _mm256_mask_scalef_pd(src, k, a, b) simde_mm256_mask_scalef_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_scalef_pd (simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_scalef_pd(k, a, b); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_scalef_pd - #define _mm256_maskz_scalef_pd(k, a, b) simde_mm256_maskz_scalef_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_scalef_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_scalef_pd(a, b); - #else - return simde_mm512_mul_pd(simde_x_mm512_flushsubnormal_pd(a), simde_mm512_exp2_pd(simde_mm512_floor_pd(simde_x_mm512_flushsubnormal_pd(b)))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_scalef_pd - #define _mm512_scalef_pd(a, b) simde_mm512_scalef_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_scalef_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_scalef_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_scalef_pd - #define _mm512_mask_scalef_pd(src, k, a, b) simde_mm512_mask_scalef_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_scalef_pd (simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_scalef_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_scalef_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_scalef_pd - #define _mm512_maskz_scalef_pd(k, a, b) simde_mm512_maskz_scalef_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_scalef_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm_scalef_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - a_.f32[0] = (simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0]))); - - return simde__m128_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_scalef_ss - #define _mm_scalef_ss(a, b) simde_mm_scalef_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_scalef_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(HEDLEY_GCC_VERSION) - return _mm_mask_scalef_round_ss(src, k, a, b, _MM_FROUND_CUR_DIRECTION); - #else - simde__m128_private - src_ = simde__m128_to_private(src), - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - a_.f32[0] = ((k & 1) ? ((simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0])))) : src_.f32[0]); - - return simde__m128_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_scalef_ss - #define _mm_mask_scalef_ss(src, k, a, b) simde_mm_mask_scalef_ss(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_scalef_ss (simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) - return _mm_maskz_scalef_ss(k, a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - a_.f32[0] = ((k & 1) ? ((simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0])))) : SIMDE_FLOAT32_C(0.0)); - - return simde__m128_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_scalef_ss - #define _mm_maskz_scalef_ss(k, a, b) simde_mm_maskz_scalef_ss(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_scalef_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm_scalef_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - a_.f64[0] = (simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor((simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0]))); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_scalef_sd - #define _mm_scalef_sd(a, b) simde_mm_scalef_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_scalef_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) - return _mm_mask_scalef_sd(src, k, a, b); - #else - simde__m128d_private - src_ = simde__m128d_to_private(src), - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - a_.f64[0] = ((k & 1) ? ((simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor((simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0])))) : src_.f64[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_scalef_sd - #define _mm_mask_scalef_sd(src, k, a, b) simde_mm_mask_scalef_sd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_scalef_sd (simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) - return _mm_maskz_scalef_sd(k, a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - a_.f64[0] = ((k & 1) ? ((simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor(simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0]))) : SIMDE_FLOAT64_C(0.0)); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_scalef_sd - #define _mm_maskz_scalef_sd(k, a, b) simde_mm_maskz_scalef_sd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SCALEF_H) */ -/* :: End simde/x86/avx512/scalef.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SET4_H) -#define SIMDE_X86_AVX512_SET4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { - simde__m512i_private r_; - - r_.i32[ 0] = a; - r_.i32[ 1] = b; - r_.i32[ 2] = c; - r_.i32[ 3] = d; - r_.i32[ 4] = a; - r_.i32[ 5] = b; - r_.i32[ 6] = c; - r_.i32[ 7] = d; - r_.i32[ 8] = a; - r_.i32[ 9] = b; - r_.i32[10] = c; - r_.i32[11] = d; - r_.i32[12] = a; - r_.i32[13] = b; - r_.i32[14] = c; - r_.i32[15] = d; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_epi32 - #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { - simde__m512i_private r_; - - r_.i64[0] = a; - r_.i64[1] = b; - r_.i64[2] = c; - r_.i64[3] = d; - r_.i64[4] = a; - r_.i64[5] = b; - r_.i64[6] = c; - r_.i64[7] = d; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_epi64 - #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { - simde__m512_private r_; - - r_.f32[ 0] = a; - r_.f32[ 1] = b; - r_.f32[ 2] = c; - r_.f32[ 3] = d; - r_.f32[ 4] = a; - r_.f32[ 5] = b; - r_.f32[ 6] = c; - r_.f32[ 7] = d; - r_.f32[ 8] = a; - r_.f32[ 9] = b; - r_.f32[10] = c; - r_.f32[11] = d; - r_.f32[12] = a; - r_.f32[13] = b; - r_.f32[14] = c; - r_.f32[15] = d; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_ps - #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { - simde__m512d_private r_; - - r_.f64[0] = a; - r_.f64[1] = b; - r_.f64[2] = c; - r_.f64[3] = d; - r_.f64[4] = a; - r_.f64[5] = b; - r_.f64[6] = c; - r_.f64[7] = d; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_pd - #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET4_H) */ -/* :: End simde/x86/avx512/set4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setr.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SETR_H) -#define SIMDE_X86_AVX512_SETR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e15; - r_.i32[ 1] = e14; - r_.i32[ 2] = e13; - r_.i32[ 3] = e12; - r_.i32[ 4] = e11; - r_.i32[ 5] = e10; - r_.i32[ 6] = e9; - r_.i32[ 7] = e8; - r_.i32[ 8] = e7; - r_.i32[ 9] = e6; - r_.i32[10] = e5; - r_.i32[11] = e4; - r_.i32[12] = e3; - r_.i32[13] = e2; - r_.i32[14] = e1; - r_.i32[15] = e0; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_epi32 - #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e7; - r_.i64[1] = e6; - r_.i64[2] = e5; - r_.i64[3] = e4; - r_.i64[4] = e3; - r_.i64[5] = e2; - r_.i64[6] = e1; - r_.i64[7] = e0; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_epi64 - #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e15; - r_.f32[ 1] = e14; - r_.f32[ 2] = e13; - r_.f32[ 3] = e12; - r_.f32[ 4] = e11; - r_.f32[ 5] = e10; - r_.f32[ 6] = e9; - r_.f32[ 7] = e8; - r_.f32[ 8] = e7; - r_.f32[ 9] = e6; - r_.f32[10] = e5; - r_.f32[11] = e4; - r_.f32[12] = e3; - r_.f32[13] = e2; - r_.f32[14] = e1; - r_.f32[15] = e0; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_ps - #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e7; - r_.f64[1] = e6; - r_.f64[2] = e5; - r_.f64[3] = e4; - r_.f64[4] = e3; - r_.f64[5] = e2; - r_.f64[6] = e1; - r_.f64[7] = e0; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_pd - #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETR_H) */ -/* :: End simde/x86/avx512/setr.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setr4.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SETR4_H) -#define SIMDE_X86_AVX512_SETR4_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { - simde__m512i_private r_; - - r_.i32[ 0] = d; - r_.i32[ 1] = c; - r_.i32[ 2] = b; - r_.i32[ 3] = a; - r_.i32[ 4] = d; - r_.i32[ 5] = c; - r_.i32[ 6] = b; - r_.i32[ 7] = a; - r_.i32[ 8] = d; - r_.i32[ 9] = c; - r_.i32[10] = b; - r_.i32[11] = a; - r_.i32[12] = d; - r_.i32[13] = c; - r_.i32[14] = b; - r_.i32[15] = a; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_epi32 - #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { - simde__m512i_private r_; - - r_.i64[0] = d; - r_.i64[1] = c; - r_.i64[2] = b; - r_.i64[3] = a; - r_.i64[4] = d; - r_.i64[5] = c; - r_.i64[6] = b; - r_.i64[7] = a; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_epi64 - #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { - simde__m512_private r_; - - r_.f32[ 0] = d; - r_.f32[ 1] = c; - r_.f32[ 2] = b; - r_.f32[ 3] = a; - r_.f32[ 4] = d; - r_.f32[ 5] = c; - r_.f32[ 6] = b; - r_.f32[ 7] = a; - r_.f32[ 8] = d; - r_.f32[ 9] = c; - r_.f32[10] = b; - r_.f32[11] = a; - r_.f32[12] = d; - r_.f32[13] = c; - r_.f32[14] = b; - r_.f32[15] = a; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_ps - #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { - simde__m512d_private r_; - - r_.f64[0] = d; - r_.f64[1] = c; - r_.f64[2] = b; - r_.f64[3] = a; - r_.f64[4] = d; - r_.f64[5] = c; - r_.f64[6] = b; - r_.f64[7] = a; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_pd - #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETR4_H) */ -/* :: End simde/x86/avx512/setr4.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/shldv.h :: */ -#if !defined(SIMDE_X86_AVX512_SHLDV_H) -#define SIMDE_X86_AVX512_SHLDV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shldv_epi32(simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_shldv_epi32(a, b, c); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - uint64x2_t - values_lo = vreinterpretq_u64_u32(vzip1q_u32(b_.neon_u32, a_.neon_u32)), - values_hi = vreinterpretq_u64_u32(vzip2q_u32(b_.neon_u32, a_.neon_u32)); - - int32x4_t count = vandq_s32(c_.neon_i32, vdupq_n_s32(31)); - - values_lo = vshlq_u64(values_lo, vmovl_s32(vget_low_s32(count))); - values_hi = vshlq_u64(values_hi, vmovl_high_s32(count)); - - r_.neon_u32 = - vuzp2q_u32( - vreinterpretq_u32_u64(values_lo), - vreinterpretq_u32_u64(values_hi) - ); - #elif defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i - tmp1, - lo = - simde_mm256_castps_si256( - simde_mm256_unpacklo_ps( - simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(b)), - simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(a)) - ) - ), - hi = - simde_mm256_castps_si256( - simde_mm256_unpackhi_ps( - simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(b)), - simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(a)) - ) - ), - tmp2 = - simde_mm256_castpd_si256( - simde_mm256_permute2f128_pd( - simde_mm256_castsi256_pd(lo), - simde_mm256_castsi256_pd(hi), - 32 - ) - ); - - tmp2 = - simde_mm256_sllv_epi64( - tmp2, - simde_mm256_cvtepi32_epi64( - simde_mm_and_si128( - c, - simde_mm_set1_epi32(31) - ) - ) - ); - - tmp1 = - simde_mm256_castpd_si256( - simde_mm256_permute2f128_pd( - simde_mm256_castsi256_pd(tmp2), - simde_mm256_castsi256_pd(tmp2), - 1 - ) - ); - - r_ = - simde__m128i_to_private( - simde_mm256_castsi256_si128( - simde_mm256_castps_si256( - simde_mm256_shuffle_ps( - simde_mm256_castsi256_ps(tmp2), - simde_mm256_castsi256_ps(tmp1), - 221 - ) - ) - ) - ); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde__m128i_private - c_ = simde__m128i_to_private(c), - lo = simde__m128i_to_private(simde_mm_unpacklo_epi32(b, a)), - hi = simde__m128i_to_private(simde_mm_unpackhi_epi32(b, a)); - - size_t halfway = (sizeof(r_.u32) / sizeof(r_.u32[0]) / 2); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < halfway ; i++) { - lo.u64[i] <<= (c_.u32[i] & 31); - hi.u64[i] <<= (c_.u32[halfway + i] & 31); - } - - r_ = - simde__m128i_to_private( - simde_mm_castps_si128( - simde_mm_shuffle_ps( - simde_mm_castsi128_ps(simde__m128i_from_private(lo)), - simde_mm_castsi128_ps(simde__m128i_from_private(hi)), - 221) - ) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - simde__m128i_private - c_ = simde__m128i_to_private(c); - simde__m256i_private - a_ = simde__m256i_to_private(simde_mm256_castsi128_si256(a)), - b_ = simde__m256i_to_private(simde_mm256_castsi128_si256(b)), - tmp1, - tmp2; - - tmp1.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp1.u64), SIMDE_SHUFFLE_VECTOR_(32, 32, b_.i32, a_.i32, 0, 8, 1, 9, 2, 10, 3, 11)); - SIMDE_CONVERT_VECTOR_(tmp2.u64, c_.u32); - - tmp1.u64 <<= (tmp2.u64 & 31); - - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, tmp1.m128i_private[0].i32, tmp1.m128i_private[1].i32, 1, 3, 5, 7); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (((HEDLEY_STATIC_CAST(uint64_t, a_.u32[i]) << 32) | b_.u32[i]) << (c_.u32[i] & 31)) >> 32); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_shldv_epi32 - #define _mm_shldv_epi32(a, b, c) simde_mm_shldv_epi32(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SHLDV_H) */ -/* :: End simde/x86/avx512/shldv.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sll.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SLL_H) -#define SIMDE_X86_AVX512_SLL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sll_epi16 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sll_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sll_epi16(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sll_epi16 - #define _mm512_sll_epi16(a, count) simde_mm512_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sll_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX51BW_NATIVE) - return _mm512_mask_sll_epi16(src, k, a, count); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_sll_epi16(a, count)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sll_epi16 - #define _mm512_mask_sll_epi16(src, k, a, count) simde_mm512_mask_sll_epi16(src, k, a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sll_epi16 (simde__mmask32 k, simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_sll_epi16(k, a, count); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_sll_epi16(a, count)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sll_epi16 - #define _mm512_maskz_sll_epi16(k, a, count) simde_mm512_maskz_sll_epi16(k, a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sll_epi32 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sll_epi32(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sll_epi32(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sll_epi32 - #define _mm512_sll_epi32(a, count) simde_mm512_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sll_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sll_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sll_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sll_epi32 - #define _mm512_mask_sll_epi32(src, k, a, b) simde_mm512_mask_sll_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sll_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sll_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_sll_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sll_epi32 - #define _mm512_maskz_sll_epi32(k, a, b) simde_mm512_maskz_sll_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sll_epi64 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sll_epi64(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sll_epi64(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sll_epi64 - #define _mm512_sll_epi64(a, count) simde_mm512_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sll_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sll_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sll_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sll_epi64 - #define _mm512_mask_sll_epi64(src, k, a, b) simde_mm512_mask_sll_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sll_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sll_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_sll_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sll_epi64 - #define _mm512_maskz_sll_epi64(k, a, b) simde_mm512_maskz_sll_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SLL_H) */ -/* :: End simde/x86/avx512/sll.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sra.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRA_H) -#define SIMDE_X86_AVX512_SRA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sra_epi16 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sra_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sra_epi16(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sra_epi16 - #define _mm512_sra_epi16(a, count) simde_mm512_sra_epi16(a, count) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRA_H) */ -/* :: End simde/x86/avx512/sra.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srai.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRAI_H) -#define SIMDE_X86_AVX512_SRAI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srai_epi16 (simde__m512i a, const int imm8) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_srai_epi16(a, imm8) _mm512_srai_epi16(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srai_epi16 - #define _mm512_srai_epi16(a, imm8) simde_mm512_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srai_epi32 (simde__m512i a, const unsigned int imm8) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> imm8; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) -# define simde_mm512_srai_epi32(a, imm8) _mm512_srai_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srai_epi32 - #define _mm512_srai_epi32(a, imm8) simde_mm512_srai_epi32(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRAI_H) */ -/* :: End simde/x86/avx512/srai.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srav.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRAV_H) -#define SIMDE_X86_AVX512_SRAV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srav_epi16 (simde__m512i a, simde__m512i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_srav_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - count_ = simde__m512i_to_private(count); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i16[i]); - if (shift > 16) shift = 15; - r_.i16[i] = a_.i16[i] >> shift; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srav_epi16 - #define _mm512_srav_epi16(a, count) simde_mm512_srav_epi16(a, count) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRAV_H) */ -/* :: End simde/x86/avx512/srav.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srl.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRL_H) -#define SIMDE_X86_AVX512_SRL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srl_epi16 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_srl_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srl_epi16(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 15) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.i64[0]; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.i64[0]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srl_epi16 - #define _mm512_srl_epi16(a, count) simde_mm512_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srl_epi32 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_srl_epi32(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srl_epi32(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 31) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.i64[0]; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.i64[0]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srl_epi32 - #define _mm512_srl_epi32(a, count) simde_mm512_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_srl_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_srl_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_srl_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_srl_epi32 - #define _mm512_mask_srl_epi32(src, k, a, b) simde_mm512_mask_srl_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_srl_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_srl_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_srl_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_srl_epi32 - #define _mm512_maskz_srl_epi32(k, a, b) simde_mm512_maskz_srl_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srl_epi64 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_srl_epi64(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_srl_epi64(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 63) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count_.i64[0]; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> count_.i64[0]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srl_epi64 - #define _mm512_srl_epi64(a, count) simde_mm512_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_srl_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_srl_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_srl_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_srl_epi64 - #define _mm512_mask_srl_epi64(src, k, a, b) simde_mm512_mask_srl_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_srl_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_srl_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_srl_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_srl_epi64 - #define _mm512_maskz_srl_epi64(k, a, b) simde_mm512_maskz_srl_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRL_H) */ -/* :: End simde/x86/avx512/srl.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/store.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_STORE_H) -#define SIMDE_X86_AVX512_STORE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_ps (void * mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_ps - #define _mm512_store_ps(mem_addr, a) simde_mm512_store_ps(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_pd (void * mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_pd - #define _mm512_store_pd(mem_addr, a) simde_mm512_store_pd(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_si512 (void * mem_addr, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), &a, sizeof(a)); - #endif -} -#define simde_mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_epi8 - #undef _mm512_store_epi16 - #undef _mm512_store_epi32 - #undef _mm512_store_epi64 - #undef _mm512_store_si512 - #define _mm512_store_si512(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_STORE_H) */ -/* :: End simde/x86/avx512/store.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/subs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SUBS_H) -#define SIMDE_X86_AVX512_SUBS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epi8 - #define _mm512_subs_epi8(a, b) simde_mm512_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_subs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_subs_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_subs_epi8 - #define _mm512_mask_subs_epi8(src, k, a, b) simde_mm512_mask_subs_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_subs_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_subs_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_subs_epi8 - #define _mm512_maskz_subs_epi8(k, a, b) simde_mm512_maskz_subs_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epi16 - #define _mm512_subs_epi16(a, b) simde_mm512_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_subs_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epu8 - #define _mm512_subs_epu8(a, b) simde_mm512_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_subs_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_subs_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_subs_epu8 - #define _mm512_mask_subs_epu8(src, k, a, b) simde_mm512_mask_subs_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_subs_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_subs_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_subs_epu8 - #define _mm512_maskz_subs_epu8(k, a, b) simde_mm512_maskz_subs_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epu16 - #define _mm512_subs_epu16(a, b) simde_mm512_subs_epu16(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SUBS_H) */ -/* :: End simde/x86/avx512/subs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/ternarylogic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Kunwar Maheep Singh - * 2021 Christopher Moore - */ - -/* The ternarylogic implementation is based on Wojciech Muła's work at - * https://github.com/WojciechMula/ternary-logic */ - -#if !defined(SIMDE_X86_AVX512_TERNARYLOGIC_H) -#define SIMDE_X86_AVX512_TERNARYLOGIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x00_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, b); - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t c0 = 0; - return c0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x01_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x02_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x03_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x04_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x05_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c | a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x06_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x07_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x08_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = t1 & c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x09_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = c & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 | c; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 | b; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b | c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x0f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x10_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x11_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c | b; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x12_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x13_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = b | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x14_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x15_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = c | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x16_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & t1; - const uint_fast32_t t3 = ~a; - const uint_fast32_t t4 = b ^ c; - const uint_fast32_t t5 = t3 & t4; - const uint_fast32_t t6 = t2 | t5; - return t6; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x17_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = b & c; - const uint_fast32_t t2 = (a & t0) | (~a & t1); - const uint_fast32_t t3 = ~t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x18_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x19_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = b & c; - const uint_fast32_t t2 = a & t1; - const uint_fast32_t t3 = t0 ^ t2; - const uint_fast32_t t4 = ~t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 | c; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ b; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 | b; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x1f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x20_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 & c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x21_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = b | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x22_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = c & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x23_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 | c; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x24_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x25_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = a ^ t2; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x26_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x27_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 | c; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x28_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = c & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x29_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = b ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 & t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c & t1; - const uint_fast32_t t3 = ~c; - const uint_fast32_t t4 = b | a; - const uint_fast32_t t5 = ~t4; - const uint_fast32_t t6 = t3 & t5; - const uint_fast32_t t7 = t2 | t6; - return t7; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b | t0; - const uint_fast32_t t2 = a ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a & b; - const uint_fast32_t t2 = t0 ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x2f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 & c; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x30_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x31_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 | a; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x32_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a | c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x33_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~b; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x34_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ b; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x35_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 | a; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x36_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = b ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x37_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = b & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x38_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x39_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = b ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a & t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 & c; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 & c; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b ^ a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = a | c; - const uint_fast32_t t2 = ~t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t t2 = a ^ b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x3f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x40_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 & b; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x41_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = c | t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x42_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x43_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = a ^ t2; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x44_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x45_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 | b; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x46_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x47_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 | b; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x48_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = b & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x49_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | b; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = b ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 & t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = a ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & t1; - const uint_fast32_t t3 = ~b; - const uint_fast32_t t4 = a | c; - const uint_fast32_t t5 = ~t4; - const uint_fast32_t t6 = t3 & t5; - const uint_fast32_t t7 = t2 | t6; - return t7; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = c & t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 & b; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x4f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = b & t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x50_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x51_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 | a; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x52_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x53_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 | a; - const uint_fast32_t t3 = t0 ^ t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x54_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a | b; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x55_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = ~c; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x56_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = c ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x57_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = c & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x58_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x59_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = c ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c ^ a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a & t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 & b; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 & b; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x5f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c & a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x60_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x61_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = a ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 & t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x62_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x63_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = b ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x64_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | b; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x65_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | b; - const uint_fast32_t t2 = c ^ t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x66_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c ^ b; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x67_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a | b; - const uint_fast32_t t2 = ~t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x68_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a & t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = b & c; - const uint_fast32_t t4 = t2 & t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x69_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = c ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); - const uint_fast32_t t2 = a ^ c1; - const uint_fast32_t t3 = b ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = b ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); - const uint_fast32_t t2 = a ^ c1; - const uint_fast32_t t3 = b ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x6f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x70_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x71_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = a & t2; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x72_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = c & t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 & a; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x73_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = a & t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x74_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b & t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 & a; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x75_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = a & t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x76_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x77_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c & b; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x78_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x79_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); - const uint_fast32_t t2 = b ^ c1; - const uint_fast32_t t3 = a ^ c; - const uint_fast32_t t4 = t2 ^ t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = a ^ b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x7f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); - const uint_fast32_t t2 = t1 ^ c1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x80_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x81_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = a ^ t2; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x82_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x83_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 | c; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x84_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x85_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 | b; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x86_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = c ^ t1; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x87_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x88_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c & b; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x89_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 | b; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | b; - const uint_fast32_t t2 = c & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | b; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 | c; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = b & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 | b; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 | c; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = t1 & t2; - const uint_fast32_t t4 = t0 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x8f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b & c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x90_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x91_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 | a; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x92_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = c ^ t1; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x93_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = b ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x94_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = b ^ t1; - const uint_fast32_t t3 = t0 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x95_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = c ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x96_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a ^ t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x97_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = t1 ^ a; - const uint_fast32_t t3 = b ^ c; - const uint_fast32_t t4 = a ^ t3; - const uint_fast32_t t5 = t2 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x98_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | b; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x99_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c ^ b; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 ^ c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 & c; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 ^ b; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 & b; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = c ^ t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0x9f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c & a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 | a; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a | t0; - const uint_fast32_t t2 = c & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 | c; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | b; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c ^ a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = t1 ^ c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 & c; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | b; - const uint_fast32_t t1 = c & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xa9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = c ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xaa_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, b); - return c; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xab_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xac_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 & b; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xad_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xae_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = t1 | c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xaf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = c | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = a & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 | c; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = b & t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = a | c; - const uint_fast32_t t4 = t2 & t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a & c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = t1 ^ a; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~b; - const uint_fast32_t t3 = t2 & a; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = c ^ t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = b & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 & a; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xb9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xba_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 | c; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xbb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = c | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xbc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = a ^ b; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xbd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xbe_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = c | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xbf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b & a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 | a; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | c; - const uint_fast32_t t3 = t1 & t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = ~t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = b & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 | a; - const uint_fast32_t t2 = ~a; - const uint_fast32_t t3 = t2 | b; - const uint_fast32_t t4 = t1 & t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t t2 = t1 ^ b; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 & b; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = b & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xc9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = b ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xca_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 & c; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xcb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b & c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xcc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, c); - return b; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xcd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xce_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t t2 = t1 | b; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xcf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b | t0; - const uint_fast32_t t2 = a & t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t t2 = t1 ^ a; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = ~c; - const uint_fast32_t t3 = t2 & a; - const uint_fast32_t t4 = t1 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b & t0; - const uint_fast32_t t2 = b ^ c; - const uint_fast32_t t3 = ~t2; - const uint_fast32_t t4 = a & t3; - const uint_fast32_t t5 = t1 | t4; - return t5; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a & b; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = b ^ t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = c & t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = c & b; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 & a; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xd9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xda_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = a ^ c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xdb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a ^ c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xdc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & a; - const uint_fast32_t t2 = t1 | b; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xdd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = b | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xde_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = b | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xdf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a & t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a ^ t0; - const uint_fast32_t t2 = ~t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = ~b; - const uint_fast32_t t2 = t1 & c; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ b; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & c; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = c & a; - const uint_fast32_t t1 = ~c; - const uint_fast32_t t2 = t1 & b; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a & b; - const uint_fast32_t t3 = t1 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & b; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~a; - const uint_fast32_t t2 = t1 ^ c; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = a & t1; - const uint_fast32_t t3 = t0 | t2; - return t3; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xe9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b ^ c; - const uint_fast32_t t2 = t0 ^ t1; - const uint_fast32_t t3 = a & b; - const uint_fast32_t t4 = t2 | t3; - return t4; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xea_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & a; - const uint_fast32_t t1 = c | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xeb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ a; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = c | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xec_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a & c; - const uint_fast32_t t1 = b | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xed_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = a ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = b | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xee_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - const uint_fast32_t t0 = c | b; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xef_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~a; - const uint_fast32_t t1 = b | c; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - HEDLEY_STATIC_CAST(void, c); - return a; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 & c; - const uint_fast32_t t2 = t1 | a; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = a | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = t0 & b; - const uint_fast32_t t2 = t1 | a; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = a | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b & c; - const uint_fast32_t t1 = a | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xf9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b ^ c; - const uint_fast32_t t1 = ~t0; - const uint_fast32_t t2 = a | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xfa_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, b); - const uint_fast32_t t0 = c | a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xfb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~b; - const uint_fast32_t t1 = t0 | c; - const uint_fast32_t t2 = a | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xfc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t t0 = b | a; - return t0; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xfd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = ~c; - const uint_fast32_t t1 = a | b; - const uint_fast32_t t2 = t0 | t1; - return t2; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xfe_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - const uint_fast32_t t0 = b | c; - const uint_fast32_t t1 = a | t0; - return t1; -} - -SIMDE_FUNCTION_ATTRIBUTES -uint_fast32_t -simde_x_ternarylogic_0xff_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { - HEDLEY_STATIC_CAST(void, a); - HEDLEY_STATIC_CAST(void, b); - HEDLEY_STATIC_CAST(void, c); - const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); - return c1; -} - -#define SIMDE_X_TERNARYLOGIC_CASE(value) \ - case value: \ - SIMDE_VECTORIZE \ - for (size_t i = 0 ; i < (sizeof(r_.u32f) / sizeof(r_.u32f[0])) ; i++) { \ - r_.u32f[i] = HEDLEY_CONCAT3(simde_x_ternarylogic_, value, _impl_)(a_.u32f[i], b_.u32f[i], c_.u32f[i]); \ - } \ - break; - -#define SIMDE_X_TERNARYLOGIC_SWITCH(value) \ - switch(value) { \ - SIMDE_X_TERNARYLOGIC_CASE(0x00) \ - SIMDE_X_TERNARYLOGIC_CASE(0x01) \ - SIMDE_X_TERNARYLOGIC_CASE(0x02) \ - SIMDE_X_TERNARYLOGIC_CASE(0x03) \ - SIMDE_X_TERNARYLOGIC_CASE(0x04) \ - SIMDE_X_TERNARYLOGIC_CASE(0x05) \ - SIMDE_X_TERNARYLOGIC_CASE(0x06) \ - SIMDE_X_TERNARYLOGIC_CASE(0x07) \ - SIMDE_X_TERNARYLOGIC_CASE(0x08) \ - SIMDE_X_TERNARYLOGIC_CASE(0x09) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x0f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x10) \ - SIMDE_X_TERNARYLOGIC_CASE(0x11) \ - SIMDE_X_TERNARYLOGIC_CASE(0x12) \ - SIMDE_X_TERNARYLOGIC_CASE(0x13) \ - SIMDE_X_TERNARYLOGIC_CASE(0x14) \ - SIMDE_X_TERNARYLOGIC_CASE(0x15) \ - SIMDE_X_TERNARYLOGIC_CASE(0x16) \ - SIMDE_X_TERNARYLOGIC_CASE(0x17) \ - SIMDE_X_TERNARYLOGIC_CASE(0x18) \ - SIMDE_X_TERNARYLOGIC_CASE(0x19) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x1f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x20) \ - SIMDE_X_TERNARYLOGIC_CASE(0x21) \ - SIMDE_X_TERNARYLOGIC_CASE(0x22) \ - SIMDE_X_TERNARYLOGIC_CASE(0x23) \ - SIMDE_X_TERNARYLOGIC_CASE(0x24) \ - SIMDE_X_TERNARYLOGIC_CASE(0x25) \ - SIMDE_X_TERNARYLOGIC_CASE(0x26) \ - SIMDE_X_TERNARYLOGIC_CASE(0x27) \ - SIMDE_X_TERNARYLOGIC_CASE(0x28) \ - SIMDE_X_TERNARYLOGIC_CASE(0x29) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x2f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x30) \ - SIMDE_X_TERNARYLOGIC_CASE(0x31) \ - SIMDE_X_TERNARYLOGIC_CASE(0x32) \ - SIMDE_X_TERNARYLOGIC_CASE(0x33) \ - SIMDE_X_TERNARYLOGIC_CASE(0x34) \ - SIMDE_X_TERNARYLOGIC_CASE(0x35) \ - SIMDE_X_TERNARYLOGIC_CASE(0x36) \ - SIMDE_X_TERNARYLOGIC_CASE(0x37) \ - SIMDE_X_TERNARYLOGIC_CASE(0x38) \ - SIMDE_X_TERNARYLOGIC_CASE(0x39) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x3f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x40) \ - SIMDE_X_TERNARYLOGIC_CASE(0x41) \ - SIMDE_X_TERNARYLOGIC_CASE(0x42) \ - SIMDE_X_TERNARYLOGIC_CASE(0x43) \ - SIMDE_X_TERNARYLOGIC_CASE(0x44) \ - SIMDE_X_TERNARYLOGIC_CASE(0x45) \ - SIMDE_X_TERNARYLOGIC_CASE(0x46) \ - SIMDE_X_TERNARYLOGIC_CASE(0x47) \ - SIMDE_X_TERNARYLOGIC_CASE(0x48) \ - SIMDE_X_TERNARYLOGIC_CASE(0x49) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x4f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x50) \ - SIMDE_X_TERNARYLOGIC_CASE(0x51) \ - SIMDE_X_TERNARYLOGIC_CASE(0x52) \ - SIMDE_X_TERNARYLOGIC_CASE(0x53) \ - SIMDE_X_TERNARYLOGIC_CASE(0x54) \ - SIMDE_X_TERNARYLOGIC_CASE(0x55) \ - SIMDE_X_TERNARYLOGIC_CASE(0x56) \ - SIMDE_X_TERNARYLOGIC_CASE(0x57) \ - SIMDE_X_TERNARYLOGIC_CASE(0x58) \ - SIMDE_X_TERNARYLOGIC_CASE(0x59) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x5f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x60) \ - SIMDE_X_TERNARYLOGIC_CASE(0x61) \ - SIMDE_X_TERNARYLOGIC_CASE(0x62) \ - SIMDE_X_TERNARYLOGIC_CASE(0x63) \ - SIMDE_X_TERNARYLOGIC_CASE(0x64) \ - SIMDE_X_TERNARYLOGIC_CASE(0x65) \ - SIMDE_X_TERNARYLOGIC_CASE(0x66) \ - SIMDE_X_TERNARYLOGIC_CASE(0x67) \ - SIMDE_X_TERNARYLOGIC_CASE(0x68) \ - SIMDE_X_TERNARYLOGIC_CASE(0x69) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x6f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x70) \ - SIMDE_X_TERNARYLOGIC_CASE(0x71) \ - SIMDE_X_TERNARYLOGIC_CASE(0x72) \ - SIMDE_X_TERNARYLOGIC_CASE(0x73) \ - SIMDE_X_TERNARYLOGIC_CASE(0x74) \ - SIMDE_X_TERNARYLOGIC_CASE(0x75) \ - SIMDE_X_TERNARYLOGIC_CASE(0x76) \ - SIMDE_X_TERNARYLOGIC_CASE(0x77) \ - SIMDE_X_TERNARYLOGIC_CASE(0x78) \ - SIMDE_X_TERNARYLOGIC_CASE(0x79) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x7f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x80) \ - SIMDE_X_TERNARYLOGIC_CASE(0x81) \ - SIMDE_X_TERNARYLOGIC_CASE(0x82) \ - SIMDE_X_TERNARYLOGIC_CASE(0x83) \ - SIMDE_X_TERNARYLOGIC_CASE(0x84) \ - SIMDE_X_TERNARYLOGIC_CASE(0x85) \ - SIMDE_X_TERNARYLOGIC_CASE(0x86) \ - SIMDE_X_TERNARYLOGIC_CASE(0x87) \ - SIMDE_X_TERNARYLOGIC_CASE(0x88) \ - SIMDE_X_TERNARYLOGIC_CASE(0x89) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x8f) \ - SIMDE_X_TERNARYLOGIC_CASE(0x90) \ - SIMDE_X_TERNARYLOGIC_CASE(0x91) \ - SIMDE_X_TERNARYLOGIC_CASE(0x92) \ - SIMDE_X_TERNARYLOGIC_CASE(0x93) \ - SIMDE_X_TERNARYLOGIC_CASE(0x94) \ - SIMDE_X_TERNARYLOGIC_CASE(0x95) \ - SIMDE_X_TERNARYLOGIC_CASE(0x96) \ - SIMDE_X_TERNARYLOGIC_CASE(0x97) \ - SIMDE_X_TERNARYLOGIC_CASE(0x98) \ - SIMDE_X_TERNARYLOGIC_CASE(0x99) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9a) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9b) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9c) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9d) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9e) \ - SIMDE_X_TERNARYLOGIC_CASE(0x9f) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xa9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xaa) \ - SIMDE_X_TERNARYLOGIC_CASE(0xab) \ - SIMDE_X_TERNARYLOGIC_CASE(0xac) \ - SIMDE_X_TERNARYLOGIC_CASE(0xad) \ - SIMDE_X_TERNARYLOGIC_CASE(0xae) \ - SIMDE_X_TERNARYLOGIC_CASE(0xaf) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xb9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xba) \ - SIMDE_X_TERNARYLOGIC_CASE(0xbb) \ - SIMDE_X_TERNARYLOGIC_CASE(0xbc) \ - SIMDE_X_TERNARYLOGIC_CASE(0xbd) \ - SIMDE_X_TERNARYLOGIC_CASE(0xbe) \ - SIMDE_X_TERNARYLOGIC_CASE(0xbf) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xc9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xca) \ - SIMDE_X_TERNARYLOGIC_CASE(0xcb) \ - SIMDE_X_TERNARYLOGIC_CASE(0xcc) \ - SIMDE_X_TERNARYLOGIC_CASE(0xcd) \ - SIMDE_X_TERNARYLOGIC_CASE(0xce) \ - SIMDE_X_TERNARYLOGIC_CASE(0xcf) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xd9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xda) \ - SIMDE_X_TERNARYLOGIC_CASE(0xdb) \ - SIMDE_X_TERNARYLOGIC_CASE(0xdc) \ - SIMDE_X_TERNARYLOGIC_CASE(0xdd) \ - SIMDE_X_TERNARYLOGIC_CASE(0xde) \ - SIMDE_X_TERNARYLOGIC_CASE(0xdf) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xe9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xea) \ - SIMDE_X_TERNARYLOGIC_CASE(0xeb) \ - SIMDE_X_TERNARYLOGIC_CASE(0xec) \ - SIMDE_X_TERNARYLOGIC_CASE(0xed) \ - SIMDE_X_TERNARYLOGIC_CASE(0xee) \ - SIMDE_X_TERNARYLOGIC_CASE(0xef) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf0) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf1) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf2) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf3) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf4) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf5) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf6) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf7) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf8) \ - SIMDE_X_TERNARYLOGIC_CASE(0xf9) \ - SIMDE_X_TERNARYLOGIC_CASE(0xfa) \ - SIMDE_X_TERNARYLOGIC_CASE(0xfb) \ - SIMDE_X_TERNARYLOGIC_CASE(0xfc) \ - SIMDE_X_TERNARYLOGIC_CASE(0xfd) \ - SIMDE_X_TERNARYLOGIC_CASE(0xfe) \ - SIMDE_X_TERNARYLOGIC_CASE(0xff) \ - } - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_ternarylogic_epi32(a, b, c, imm8) _mm_ternarylogic_epi32(a, b, c, imm8) -#else - SIMDE_HUGE_FUNCTION_ATTRIBUTES - simde__m128i - simde_mm_ternarylogic_epi32(simde__m128i a, simde__m128i b, simde__m128i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) - int to_do, mask; - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m128i_private t_; - to_do = imm8; - - r_.u64 = a_.u64 ^ a_.u64; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64 = ~r_.u64; - to_do &= ~mask; - } - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 = a_.u64; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= c_.u64; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ b_.u64; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & a_.u64; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & b_.u64; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64 & c_.u64; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t_.u64 = a_.u64 & b_.u64; - if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; - else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x30) { - t_.u64 = ~b_.u64 & a_.u64; - if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; - else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x0c) { - t_.u64 = ~a_.u64 & b_.u64; - if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; - else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x03) { - t_.u64 = ~(a_.u64 | b_.u64); - if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; - else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - #else - uint64_t t; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - to_do = imm8; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64[i] = UINT64_MAX; - to_do &= ~mask; - } - else r_.u64[i] = 0; - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] = a_.u64[i]; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i]; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t = a_.u64[i] & b_.u64[i]; - if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; - else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x30) { - t = ~b_.u64[i] & a_.u64[i]; - if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; - else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x0c) { - t = ~a_.u64[i] & b_.u64[i]; - if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; - else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x03) { - t = ~(a_.u64[i] | b_.u64[i]); - if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; - else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - } - #endif - #else - SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) - #endif - - return simde__m128i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_ternarylogic_epi32 - #define _mm_ternarylogic_epi32(a, b, c, imm8) simde_mm_ternarylogic_epi32(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm_mask_ternarylogic_epi32(src, k, a, b, imm8) -#else - #define simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_ternarylogic_epi32(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_ternarylogic_epi32 - #define _mm_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#else - #define simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_ternarylogic_epi32(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_ternarylogic_epi32 - #define _mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm256_ternarylogic_epi32(a, b, c, imm8) _mm256_ternarylogic_epi32(a, b, c, imm8) -#else - SIMDE_HUGE_FUNCTION_ATTRIBUTES - simde__m256i - simde_mm256_ternarylogic_epi32(simde__m256i a, simde__m256i b, simde__m256i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - c_ = simde__m256i_to_private(c); - - #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) - int to_do, mask; - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m256i_private t_; - to_do = imm8; - - r_.u64 = a_.u64 ^ a_.u64; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64 = ~r_.u64; - to_do &= ~mask; - } - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 = a_.u64; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= c_.u64; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ b_.u64; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & a_.u64; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & b_.u64; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64 & c_.u64; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t_.u64 = a_.u64 & b_.u64; - if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; - else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x30) { - t_.u64 = ~b_.u64 & a_.u64; - if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; - else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x0c) { - t_.u64 = ~a_.u64 & b_.u64; - if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; - else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x03) { - t_.u64 = ~(a_.u64 | b_.u64); - if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; - else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - #else - uint64_t t; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - to_do = imm8; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64[i] = UINT64_MAX; - to_do &= ~mask; - } - else r_.u64[i] = 0; - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] = a_.u64[i]; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i]; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t = a_.u64[i] & b_.u64[i]; - if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; - else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x30) { - t = ~b_.u64[i] & a_.u64[i]; - if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; - else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x0c) { - t = ~a_.u64[i] & b_.u64[i]; - if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; - else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x03) { - t = ~(a_.u64[i] | b_.u64[i]); - if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; - else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - } - #endif - #else - SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) - #endif - - return simde__m256i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_ternarylogic_epi32 - #define _mm256_ternarylogic_epi32(a, b, c, imm8) simde_mm256_ternarylogic_epi32(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) -#else - #define simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_ternarylogic_epi32(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_ternarylogic_epi32 - #define _mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#else - #define simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_ternarylogic_epi32(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_ternarylogic_epi32 - #define _mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_ternarylogic_epi32(a, b, c, imm8) _mm512_ternarylogic_epi32(a, b, c, imm8) -#else - SIMDE_HUGE_FUNCTION_ATTRIBUTES - simde__m512i - simde_mm512_ternarylogic_epi32(simde__m512i a, simde__m512i b, simde__m512i c, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - c_ = simde__m512i_to_private(c); - - #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) - int to_do, mask; - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private t_; - to_do = imm8; - - r_.u64 = a_.u64 ^ a_.u64; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64 = ~r_.u64; - to_do &= ~mask; - } - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 = a_.u64; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= c_.u64; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ b_.u64; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 ^ c_.u64; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & a_.u64; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~a_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= b_.u64 & c_.u64; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~c_.u64 & b_.u64; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64 |= ~b_.u64 & c_.u64; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t_.u64 = a_.u64 & b_.u64; - if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; - else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x30) { - t_.u64 = ~b_.u64 & a_.u64; - if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; - else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x0c) { - t_.u64 = ~a_.u64 & b_.u64; - if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; - else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - - if (to_do & 0x03) { - t_.u64 = ~(a_.u64 | b_.u64); - if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; - else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; - else r_.u64 |= ~c_.u64 & t_.u64; - } - #else - uint64_t t; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - to_do = imm8; - - mask = 0xFF; - if ((to_do & mask) == mask) { - r_.u64[i] = UINT64_MAX; - to_do &= ~mask; - } - else r_.u64[i] = 0; - - mask = 0xF0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] = a_.u64[i]; - to_do &= ~mask; - } - - mask = 0xCC; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i]; - to_do &= ~mask; - } - - mask = 0xAA; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0F; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x33; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x55; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x3C; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x5A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x66; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; - to_do &= ~mask; - } - - mask = 0xA0; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x50; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; - to_do &= ~mask; - } - - mask = 0x0A; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x88; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - mask = 0x44; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; - to_do &= ~mask; - } - - mask = 0x22; - if ((to_do & mask) && ((imm8 & mask) == mask)) { - r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; - to_do &= ~mask; - } - - if (to_do & 0xc0) { - t = a_.u64[i] & b_.u64[i]; - if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; - else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x30) { - t = ~b_.u64[i] & a_.u64[i]; - if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; - else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x0c) { - t = ~a_.u64[i] & b_.u64[i]; - if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; - else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - - if (to_do & 0x03) { - t = ~(a_.u64[i] | b_.u64[i]); - if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; - else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; - else r_.u64[i] |= ~c_.u64[i] & t; - } - } - #endif - #else - SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) - #endif - - return simde__m512i_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_ternarylogic_epi32 - #define _mm512_ternarylogic_epi32(a, b, c, imm8) simde_mm512_ternarylogic_epi32(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) -#else - #define simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_ternarylogic_epi32(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ternarylogic_epi32 - #define _mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#else - #define simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_ternarylogic_epi32(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_ternarylogic_epi32 - #define _mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_ternarylogic_epi64(a, b, c, imm8) _mm_ternarylogic_epi64(a, b, c, imm8) -#else - #define simde_mm_ternarylogic_epi64(a, b, c, imm8) simde_mm_ternarylogic_epi32(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_ternarylogic_epi64 - #define _mm_ternarylogic_epi64(a, b, c, imm8) simde_mm_ternarylogic_epi64(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm_mask_ternarylogic_epi64(src, k, a, b, imm8) -#else - #define simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_ternarylogic_epi64(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_ternarylogic_epi64 - #define _mm_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#else - #define simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_ternarylogic_epi64(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_ternarylogic_epi64 - #define _mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_ternarylogic_epi64(a, b, c, imm8) _mm256_ternarylogic_epi64(a, b, c, imm8) -#else - #define simde_mm256_ternarylogic_epi64(a, b, c, imm8) simde_mm256_ternarylogic_epi32(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_ternarylogic_epi64 - #define _mm256_ternarylogic_epi64(a, b, c, imm8) simde_mm256_ternarylogic_epi64(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) -#else - #define simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_ternarylogic_epi64(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_ternarylogic_epi64 - #define _mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#else - #define simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_ternarylogic_epi64(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_ternarylogic_epi64 - #define _mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_ternarylogic_epi64(a, b, c, imm8) _mm512_ternarylogic_epi64(a, b, c, imm8) -#else - #define simde_mm512_ternarylogic_epi64(a, b, c, imm8) simde_mm512_ternarylogic_epi32(a, b, c, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_ternarylogic_epi64 - #define _mm512_ternarylogic_epi64(a, b, c, imm8) simde_mm512_ternarylogic_epi64(a, b, c, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) -#else - #define simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_ternarylogic_epi64(src, a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ternarylogic_epi64 - #define _mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#else - #define simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_ternarylogic_epi64(a, b, c, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_ternarylogic_epi64 - #define _mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TERNARYLOGIC_H) */ -/* :: End simde/x86/avx512/ternarylogic.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/testn.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_TESTN_H) -#define SIMDE_X86_AVX512_TESTN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_testn_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_testn_epi64_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (!(a_.i64[i] & b_.i64[i])) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_testn_epi64_mask - #define _mm512_testn_epi64_mask(a, b) simde_mm512_testn_epi64_mask(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TESTN_H) */ -/* :: End simde/x86/avx512/testn.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/unpacklo.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_UNPACKLO_H) -#define SIMDE_X86_AVX512_UNPACKLO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpacklo_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, - 0, 64, 1, 65, 2, 66, 3, 67, - 4, 68, 5, 69, 6, 70, 7, 71, - 16, 80, 17, 81, 18, 82, 19, 83, - 20, 84, 21, 85, 22, 86, 23, 87, - 32, 96, 33, 97, 34, 98, 35, 99, - 36, 100, 37, 101, 38, 102, 39, 103, - 48, 112, 49, 113, 50, 114, 51, 115, - 52, 116, 53, 117, 54, 118, 55, 119); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpacklo_epi8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi8(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi8 - #define _mm512_unpacklo_epi8(a, b) simde_mm512_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi8(simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_unpacklo_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi8 - #define _mm512_mask_unpacklo_epi8(src, k, a, b) simde_mm512_mask_unpacklo_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_unpacklo_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi8 - #define _mm512_maskz_unpacklo_epi8(k, a, b) simde_mm512_maskz_unpacklo_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpacklo_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_epi8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_epi8 - #define _mm256_mask_unpacklo_epi8(src, k, a, b) simde_mm256_mask_unpacklo_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpacklo_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_epi8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_epi8 - #define _mm256_maskz_unpacklo_epi8(k, a, b) simde_mm256_maskz_unpacklo_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpacklo_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_epi8 - #define _mm_mask_unpacklo_epi8(src, k, a, b) simde_mm_mask_unpacklo_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpacklo_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_unpacklo_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_epi8 - #define _mm_maskz_unpacklo_epi8(k, a, b) simde_mm_maskz_unpacklo_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpacklo_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, - 0, 32, 1, 33, 2, 34, 3, 35, 8, 40, 9, 41, 10, 42, 11, 43, - 16, 48, 17, 49, 18, 50, 19, 51, 24, 56, 25, 57, 26, 58, 27, 59); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpacklo_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi16(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi16 - #define _mm512_unpacklo_epi16(a, b) simde_mm512_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi16(simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_unpacklo_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi16 - #define _mm512_mask_unpacklo_epi16(src, k, a, b) simde_mm512_mask_unpacklo_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_unpacklo_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi16 - #define _mm512_maskz_unpacklo_epi16(k, a, b) simde_mm512_maskz_unpacklo_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpacklo_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_epi16 - #define _mm256_mask_unpacklo_epi16(src, k, a, b) simde_mm256_mask_unpacklo_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpacklo_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_epi16 - #define _mm256_maskz_unpacklo_epi16(k, a, b) simde_mm256_maskz_unpacklo_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpacklo_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_epi16 - #define _mm_mask_unpacklo_epi16(src, k, a, b) simde_mm_mask_unpacklo_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpacklo_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_unpacklo_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_epi16 - #define _mm_maskz_unpacklo_epi16(k, a, b) simde_mm_maskz_unpacklo_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, - 0, 16, 1, 17, 4, 20, 5, 21, - 8, 24, 9, 25, 12, 28, 13, 29); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpacklo_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi32 - #define _mm512_unpacklo_epi32(a, b) simde_mm512_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi32 - #define _mm512_mask_unpacklo_epi32(src, k, a, b) simde_mm512_mask_unpacklo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi32 - #define _mm512_maskz_unpacklo_epi32(k, a, b) simde_mm512_maskz_unpacklo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpacklo_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_epi32 - #define _mm256_mask_unpacklo_epi32(src, k, a, b) simde_mm256_mask_unpacklo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpacklo_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_epi32 - #define _mm256_maskz_unpacklo_epi32(k, a, b) simde_mm256_maskz_unpacklo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpacklo_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_epi32 - #define _mm_mask_unpacklo_epi32(src, k, a, b) simde_mm_mask_unpacklo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpacklo_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_epi32 - #define _mm_maskz_unpacklo_epi32(k, a, b) simde_mm_maskz_unpacklo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 0, 8, 2, 10, 4, 12, 6, 14); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpacklo_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi64(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi64 - #define _mm512_unpacklo_epi64(a, b) simde_mm512_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi64 - #define _mm512_mask_unpacklo_epi64(src, k, a, b) simde_mm512_mask_unpacklo_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi64 - #define _mm512_maskz_unpacklo_epi64(k, a, b) simde_mm512_maskz_unpacklo_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpacklo_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_epi64 - #define _mm256_mask_unpacklo_epi64(src, k, a, b) simde_mm256_mask_unpacklo_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpacklo_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_epi64 - #define _mm256_maskz_unpacklo_epi64(k, a, b) simde_mm256_maskz_unpacklo_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpacklo_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_epi64 - #define _mm_mask_unpacklo_epi64(src, k, a, b) simde_mm_mask_unpacklo_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpacklo_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_epi64 - #define _mm_maskz_unpacklo_epi64(k, a, b) simde_mm_maskz_unpacklo_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_unpacklo_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, - 0, 16, 1, 17, 4, 20, 5, 21, - 8, 24, 9, 25, 12, 28, 13, 29); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_unpacklo_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_unpacklo_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0]) / 2) ; i++) { - r_.f32[2 * i] = a_.f32[i + ~(~i | 1)]; - r_.f32[2 * i + 1] = b_.f32[i + ~(~i | 1)]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_ps - #define _mm512_unpacklo_ps(a, b) simde_mm512_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_unpacklo_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_ps - #define _mm512_mask_unpacklo_ps(src, k, a, b) simde_mm512_mask_unpacklo_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_unpacklo_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_ps - #define _mm512_maskz_unpacklo_ps(k, a, b) simde_mm512_maskz_unpacklo_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_unpacklo_ps(simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_ps(src, k, a, b); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_ps - #define _mm256_mask_unpacklo_ps(src, k, a, b) simde_mm256_mask_unpacklo_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_unpacklo_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_ps(k, a, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_ps - #define _mm256_maskz_unpacklo_ps(k, a, b) simde_mm256_maskz_unpacklo_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_unpacklo_ps(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_ps(src, k, a, b); - #else - return simde_mm_mask_mov_ps(src, k, simde_mm_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_ps - #define _mm_mask_unpacklo_ps(src, k, a, b) simde_mm_mask_unpacklo_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_unpacklo_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_ps(k, a, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_ps - #define _mm_maskz_unpacklo_ps(k, a, b) simde_mm_maskz_unpacklo_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_unpacklo_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 0, 8, 2, 10, 4, 12, 6, 14); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_unpacklo_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_unpacklo_pd(a_.m256d[1], b_.m256d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0]) / 2) ; i++) { - r_.f64[2 * i] = a_.f64[2 * i]; - r_.f64[2 * i + 1] = b_.f64[2 * i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_pd - #define _mm512_unpacklo_pd(a, b) simde_mm512_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_unpacklo_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_pd - #define _mm512_mask_unpacklo_pd(src, k, a, b) simde_mm512_mask_unpacklo_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_unpacklo_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_pd - #define _mm512_maskz_unpacklo_pd(k, a, b) simde_mm512_maskz_unpacklo_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_unpacklo_pd(simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpacklo_pd(src, k, a, b); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpacklo_pd - #define _mm256_mask_unpacklo_pd(src, k, a, b) simde_mm256_mask_unpacklo_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_unpacklo_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpacklo_pd(k, a, b); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpacklo_pd - #define _mm256_maskz_unpacklo_pd(k, a, b) simde_mm256_maskz_unpacklo_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_unpacklo_pd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpacklo_pd(src, k, a, b); - #else - return simde_mm_mask_mov_pd(src, k, simde_mm_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpacklo_pd - #define _mm_mask_unpacklo_pd(src, k, a, b) simde_mm_mask_unpacklo_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_unpacklo_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpacklo_pd(k, a, b); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpacklo_pd - #define _mm_maskz_unpacklo_pd(k, a, b) simde_mm_maskz_unpacklo_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_UNPACKLO_H) */ -/* :: End simde/x86/avx512/unpacklo.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/unpackhi.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_UNPACKHI_H) -#define SIMDE_X86_AVX512_UNPACKHI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpackhi_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, - 8, 72, 9, 73, 10, 74, 11, 75, - 12, 76, 13, 77, 14, 78, 15, 79, - 24, 88, 25, 89, 26, 90, 27, 91, - 28, 92, 29, 93, 30, 94, 31, 95, - 40, 104, 41, 105, 42, 106, 43, 107, - 44, 108, 45, 109, 46, 110, 47, 111, - 56, 120, 57, 121, 58, 122, 59, 123, - 60, 124, 61, 125, 62, 126, 63, 127); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpackhi_epi8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi8(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi8 - #define _mm512_unpackhi_epi8(a, b) simde_mm512_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi8(simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_unpackhi_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi8 - #define _mm512_mask_unpackhi_epi8(src, k, a, b) simde_mm512_mask_unpackhi_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_unpackhi_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi8 - #define _mm512_maskz_unpackhi_epi8(k, a, b) simde_mm512_maskz_unpackhi_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpackhi_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_epi8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_epi8 - #define _mm256_mask_unpackhi_epi8(src, k, a, b) simde_mm256_mask_unpackhi_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpackhi_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_epi8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_epi8 - #define _mm256_maskz_unpackhi_epi8(k, a, b) simde_mm256_maskz_unpackhi_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpackhi_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_epi8 - #define _mm_mask_unpackhi_epi8(src, k, a, b) simde_mm_mask_unpackhi_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpackhi_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_unpackhi_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_epi8 - #define _mm_maskz_unpackhi_epi8(k, a, b) simde_mm_maskz_unpackhi_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpackhi_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, - 4, 36, 5, 37, 6, 38, 7, 39, 12, 44, 13, 45, 14, 46, 15, 47, - 20, 52, 21, 53, 22, 54, 23, 55, 28, 60, 29, 61, 30, 62, 31, 63); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpackhi_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi16(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi16 - #define _mm512_unpackhi_epi16(a, b) simde_mm512_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi16(simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_unpackhi_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi16 - #define _mm512_mask_unpackhi_epi16(src, k, a, b) simde_mm512_mask_unpackhi_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_unpackhi_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi16 - #define _mm512_maskz_unpackhi_epi16(k, a, b) simde_mm512_maskz_unpackhi_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpackhi_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_epi16 - #define _mm256_mask_unpackhi_epi16(src, k, a, b) simde_mm256_mask_unpackhi_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpackhi_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_epi16 - #define _mm256_maskz_unpackhi_epi16(k, a, b) simde_mm256_maskz_unpackhi_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpackhi_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_epi16 - #define _mm_mask_unpackhi_epi16(src, k, a, b) simde_mm_mask_unpackhi_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpackhi_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_unpackhi_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_epi16 - #define _mm_maskz_unpackhi_epi16(k, a, b) simde_mm_maskz_unpackhi_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, - 2, 18, 3 , 19, 6, 22, 7, 23, - 10, 26, 11, 27, 14, 30, 15, 31); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpackhi_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi32 - #define _mm512_unpackhi_epi32(a, b) simde_mm512_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi32 - #define _mm512_mask_unpackhi_epi32(src, k, a, b) simde_mm512_mask_unpackhi_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi32 - #define _mm512_maskz_unpackhi_epi32(k, a, b) simde_mm512_maskz_unpackhi_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpackhi_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_epi32 - #define _mm256_mask_unpackhi_epi32(src, k, a, b) simde_mm256_mask_unpackhi_epi32(src, k, a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpackhi_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_epi32 - #define _mm256_maskz_unpackhi_epi32(k, a, b) simde_mm256_maskz_unpackhi_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpackhi_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_epi32 - #define _mm_mask_unpackhi_epi32(src, k, a, b) simde_mm_mask_unpackhi_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpackhi_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_epi32 - #define _mm_maskz_unpackhi_epi32(k, a, b) simde_mm_maskz_unpackhi_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 1, 9, 3, 11, 5, 13, 7, 15); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_unpackhi_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi64(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi64 - #define _mm512_unpackhi_epi64(a, b) simde_mm512_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi64 - #define _mm512_mask_unpackhi_epi64(src, k, a, b) simde_mm512_mask_unpackhi_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi64 - #define _mm512_maskz_unpackhi_epi64(k, a, b) simde_mm512_maskz_unpackhi_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_unpackhi_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_epi64 - #define _mm256_mask_unpackhi_epi64(src, k, a, b) simde_mm256_mask_unpackhi_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_unpackhi_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_epi64 - #define _mm256_maskz_unpackhi_epi64(k, a, b) simde_mm256_maskz_unpackhi_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_unpackhi_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_epi64 - #define _mm_mask_unpackhi_epi64(src, k, a, b) simde_mm_mask_unpackhi_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_unpackhi_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_epi64 - #define _mm_maskz_unpackhi_epi64(k, a, b) simde_mm_maskz_unpackhi_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_unpackhi_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, - 2, 18, 3 , 19, 6, 22, 7, 23, - 10, 26, 11, 27, 14, 30, 15, 31); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_unpackhi_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_unpackhi_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0]) / 2) ; i++) { - r_.f32[2 * i] = a_.f32[i + 2 + ~(~i | 1)]; - r_.f32[2 * i + 1] = b_.f32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_ps - #define _mm512_unpackhi_ps(a, b) simde_mm512_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_unpackhi_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_ps - #define _mm512_mask_unpackhi_ps(src, k, a, b) simde_mm512_mask_unpackhi_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_unpackhi_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_ps - #define _mm512_maskz_unpackhi_ps(k, a, b) simde_mm512_maskz_unpackhi_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_unpackhi_ps(simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_ps(src, k, a, b); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_ps - #define _mm256_mask_unpackhi_ps(src, k, a, b) simde_mm256_mask_unpackhi_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_unpackhi_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_ps(k, a, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_ps - #define _mm256_maskz_unpackhi_ps(k, a, b) simde_mm256_maskz_unpackhi_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_unpackhi_ps(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_ps(src, k, a, b); - #else - return simde_mm_mask_mov_ps(src, k, simde_mm_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_ps - #define _mm_mask_unpackhi_ps(src, k, a, b) simde_mm_mask_unpackhi_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_unpackhi_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_ps(k, a, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_ps - #define _mm_maskz_unpackhi_ps(k, a, b) simde_mm_maskz_unpackhi_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_unpackhi_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 1, 9, 3, 11, 5, 13, 7, 15); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_unpackhi_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_unpackhi_pd(a_.m256d[1], b_.m256d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0]) / 2) ; i++) { - r_.f64[2 * i] = a_.f64[2 * i + 1]; - r_.f64[2 * i + 1] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_pd - #define _mm512_unpackhi_pd(a, b) simde_mm512_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_unpackhi_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_pd - #define _mm512_mask_unpackhi_pd(src, k, a, b) simde_mm512_mask_unpackhi_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_unpackhi_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_pd - #define _mm512_maskz_unpackhi_pd(k, a, b) simde_mm512_maskz_unpackhi_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_unpackhi_pd(simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_unpackhi_pd(src, k, a, b); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_unpackhi_pd - #define _mm256_mask_unpackhi_pd(src, k, a, b) simde_mm256_mask_unpackhi_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_unpackhi_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_unpackhi_pd(k, a, b); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_unpackhi_pd - #define _mm256_maskz_unpackhi_pd(k, a, b) simde_mm256_maskz_unpackhi_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_unpackhi_pd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_unpackhi_pd(src, k, a, b); - #else - return simde_mm_mask_mov_pd(src, k, simde_mm_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_unpackhi_pd - #define _mm_mask_unpackhi_pd(src, k, a, b) simde_mm_mask_unpackhi_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_unpackhi_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_unpackhi_pd(k, a, b); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_unpackhi_pd - #define _mm_maskz_unpackhi_pd(k, a, b) simde_mm_maskz_unpackhi_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_UNPACKHI_H) */ -/* :: End simde/x86/avx512/unpackhi.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#endif -/* :: End simde/x86/avx512.h :: */ diff --git a/src/simde/x86/clmul.h b/src/simde/x86/clmul.h deleted file mode 100644 index cd4199328..000000000 --- a/src/simde/x86/clmul.h +++ /dev/null @@ -1,36799 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/clmul.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2016 Thomas Pornin - */ - -/* The portable version is based on the implementation in BearSSL, - * which is MIT licensed, constant-time / branch-free, and documented - * at https://www.bearssl.org/constanttime.html (specifically, we use - * the implementation from ghash_ctmul64.c). */ - -#if !defined(SIMDE_X86_CLMUL_H) -#define SIMDE_X86_CLMUL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SET_H) -#define SIMDE_X86_AVX512_SET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_TYPES_H) -#define SIMDE_X86_AVX512_TYPES_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for - * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte - * aligned even if we reduce the alignment requirements of other members. - * - * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the - * to/from private functions will break, and I'm not willing to change their APIs to use - * pointers (which would also require more verbose code on the caller side) just to make - * MSVC happy. - * - * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, - * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to - * fix this without requiring API changes (except transparently through macros), patches - * are welcome. - */ - -# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) -# if defined(SIMDE_X86_AVX512F_NATIVE) -# undef SIMDE_X86_AVX512F_NATIVE -# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") -# endif -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 -# else -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 -# endif - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_ALIGN_TO_16 __m128bh n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_BF16_NATIVE) - SIMDE_ALIGN_TO_32 __m256bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_AVX512_ALIGN __m512bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - SIMDE_AVX512_ALIGN __m512h n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512h_private; - - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; - SIMDE_AVX512_ALIGN simde__m128i m128i[4]; - SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; - SIMDE_AVX512_ALIGN simde__m256i m256i[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512i_private; - -/* Intel uses the same header (immintrin.h) for everything AVX and - * later. If native aliases are enabled, and the machine has native - * support for AVX imintrin.h will already have been included, which - * means simde__m512* will already have been defined. So, even - * if the machine doesn't support AVX512F we need to use the native - * type; it has already been defined. - * - * However, we also can't just assume that including immintrin.h does - * actually define these. It could be a compiler which supports AVX - * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we - * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, - * so we assume that if it's present AVX-512F has already been - * declared. - * - * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC - * uses the preprocessor to define all the _MM_CMPINT_* members, - * in most compilers they are simply normal enum members. However, - * all compilers I've looked at use an object-like macro for - * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT - * is included in case a compiler does the reverse, though I haven't - * run into one which does. - * - * As for the ICC check, unlike other compilers, merely using the - * AVX-512 types causes ICC to generate AVX-512 instructions. */ -#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && \ - (defined(SIMDE_X86_AVX512F_NATIVE) || \ - !(defined(HEDLEY_INTEL_VERSION) || (defined(HEDLEY_MSVC_VERSION) && !defined(__clang__)))) - typedef __m512 simde__m512; - typedef __m512i simde__m512i; - typedef __m512d simde__m512d; - - typedef __mmask8 simde__mmask8; - typedef __mmask16 simde__mmask16; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512_private simde__m512; - typedef simde__m512i_private simde__m512i; - typedef simde__m512d_private simde__m512d; - #endif - - typedef uint8_t simde__mmask8; - typedef uint16_t simde__mmask16; -#endif - -#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) - typedef __m128bh simde__m128bh; - typedef __m256bh simde__m256bh; - typedef __m512bh simde__m512bh; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m128bh_private simde__m128bh; - typedef simde__m256bh_private simde__m256bh; - typedef simde__m512bh_private simde__m512bh; - #endif -#endif - -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - typedef __m512h simde__m512h; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_FLOAT16_VECTOR) - typedef simde_float16 simde__m512h SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512h_private simde__m512h; - #endif -#endif - -/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is - * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang - * both are in avx512bwintrin.h), not AVX-512F. However, we don't have - * a good (not-compiler-specific) way to detect if these headers have - * been included. In compilers which support AVX-512F but not - * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) - * won't exist. - * - * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t - * in all compilers, so it's safe to use these instead of typedefs to - * __mmask{16,32}. If you run into a problem with this please file an - * issue and we'll try to figure out a work-around. */ -typedef uint32_t simde__mmask32; -typedef uint64_t simde__mmask64; -#if !defined(__mmask16) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint16_t __mmask16; - #else - #define __mmask16 uint16_t; - #endif -#endif -#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint32_t __mmask32; - #else - #define __mmask32 uint32_t; - #endif -#endif -#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - #if defined(HEDLEY_GCC_VERSION) - typedef unsigned long long __mmask64; - #else - typedef uint64_t __mmask64; - #endif - #else - #define __mmask64 uint64_t; - #endif -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m512 __m512; - typedef simde__m512i __m512i; - typedef simde__m512d __m512d; - #else - #define __m512 simde__m512 - #define __m512i simde__m512i - #define __m512d simde__m512d - #endif -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m128bh __m128bh; - typedef simde__m256bh __m256bh; - typedef simde__m512bh __m512bh; - #else - #define __m128bh simde__m128bh - #define __m256bh simde__m256bh - #define __m512bh simde__m512bh - #endif -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - //typedef simde__m128h __m128h; - //typedef simde__m256h __m256h; - typedef simde__m512h __m512h; - #else - //#define __m128h simde__m128h - //#define __m256h simde__m256h - #define __m512h simde__m512h - #endif -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h), "simde__m512h size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h_private), "simde__m512h_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h) == 32, "simde__m512h is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h_private) == 32, "simde__m512h_private is not 32-byte aligned"); -#endif - -#define SIMDE_MM_CMPINT_EQ 0 -#define SIMDE_MM_CMPINT_LT 1 -#define SIMDE_MM_CMPINT_LE 2 -#define SIMDE_MM_CMPINT_FALSE 3 -#define SIMDE_MM_CMPINT_NE 4 -#define SIMDE_MM_CMPINT_NLT 5 -#define SIMDE_MM_CMPINT_NLE 6 -#define SIMDE_MM_CMPINT_TRUE 7 -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) -#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ -#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT -#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE -#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE -#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE -#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT -#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE -#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh -simde__m128bh_from_private(simde__m128bh_private v) { - simde__m128bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh_private -simde__m128bh_to_private(simde__m128bh v) { - simde__m128bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh -simde__m256bh_from_private(simde__m256bh_private v) { - simde__m256bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh_private -simde__m256bh_to_private(simde__m256bh v) { - simde__m256bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh -simde__m512bh_from_private(simde__m512bh_private v) { - simde__m512bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh_private -simde__m512bh_to_private(simde__m512bh v) { - simde__m512bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde__m512_from_private(simde__m512_private v) { - simde__m512 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512_private -simde__m512_to_private(simde__m512 v) { - simde__m512_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde__m512i_from_private(simde__m512i_private v) { - simde__m512i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i_private -simde__m512i_to_private(simde__m512i v) { - simde__m512i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde__m512d_from_private(simde__m512d_private v) { - simde__m512d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d_private -simde__m512d_to_private(simde__m512d v) { - simde__m512d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde__m512h_from_private(simde__m512h_private v) { - simde__m512h r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h_private -simde__m512h_to_private(simde__m512h v) { - simde__m512h_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ -/* :: End simde/x86/avx512/types.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/load.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LOAD_H) -#define SIMDE_X86_AVX512_LOAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_load_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); - #else - simde__m512d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_pd - #define _mm512_load_pd(a) simde_mm512_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_load_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); - #else - simde__m512 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ps - #define _mm512_load_ps(a) simde_mm512_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_load_ph (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h)); - #else - simde__m512h r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ph - #define _mm512_load_ph(a) simde_mm512_load_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_load_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); - #else - simde__m512i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); - return r; - #endif -} -#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_epi8 - #undef _mm512_load_epi16 - #undef _mm512_load_epi32 - #undef _mm512_load_epi64 - #undef _mm512_load_si512 - #define _mm512_load_si512(a) simde_mm512_load_si512(a) - #define _mm512_load_epi8(a) simde_mm512_load_si512(a) - #define _mm512_load_epi16(a) simde_mm512_load_si512(a) - #define _mm512_load_epi32(a) simde_mm512_load_si512(a) - #define _mm512_load_epi64(a) simde_mm512_load_si512(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ -/* :: End simde/x86/avx512/load.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, - int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - simde__m512i_private r_; - - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - r_.i16[16] = e16; - r_.i16[17] = e17; - r_.i16[18] = e18; - r_.i16[19] = e19; - r_.i16[20] = e20; - r_.i16[21] = e21; - r_.i16[22] = e22; - r_.i16[23] = e23; - r_.i16[24] = e24; - r_.i16[25] = e25; - r_.i16[26] = e26; - r_.i16[27] = e27; - r_.i16[28] = e28; - r_.i16[29] = e29; - r_.i16[30] = e30; - r_.i16[31] = e31; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi16 - #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - r_.i32[ 8] = e8; - r_.i32[ 9] = e9; - r_.i32[10] = e10; - r_.i32[11] = e11; - r_.i32[12] = e12; - r_.i32[13] = e13; - r_.i32[14] = e14; - r_.i32[15] = e15; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi32 - #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - r_.i64[4] = e4; - r_.i64[5] = e5; - r_.i64[6] = e6; - r_.i64[7] = e7; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi64 - #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, - uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, - uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, - uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, - uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m512i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - r_.u8[32] = e32; - r_.u8[33] = e33; - r_.u8[34] = e34; - r_.u8[35] = e35; - r_.u8[36] = e36; - r_.u8[37] = e37; - r_.u8[38] = e38; - r_.u8[39] = e39; - r_.u8[40] = e40; - r_.u8[41] = e41; - r_.u8[42] = e42; - r_.u8[43] = e43; - r_.u8[44] = e44; - r_.u8[45] = e45; - r_.u8[46] = e46; - r_.u8[47] = e47; - r_.u8[48] = e48; - r_.u8[49] = e49; - r_.u8[50] = e50; - r_.u8[51] = e51; - r_.u8[52] = e52; - r_.u8[53] = e53; - r_.u8[54] = e54; - r_.u8[55] = e55; - r_.u8[56] = e56; - r_.u8[57] = e57; - r_.u8[58] = e58; - r_.u8[59] = e59; - r_.u8[60] = e60; - r_.u8[61] = e61; - r_.u8[62] = e62; - r_.u8[63] = e63; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, - uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, - uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m512i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - r_.u16[16] = e16; - r_.u16[17] = e17; - r_.u16[18] = e18; - r_.u16[19] = e19; - r_.u16[20] = e20; - r_.u16[21] = e21; - r_.u16[22] = e22; - r_.u16[23] = e23; - r_.u16[24] = e24; - r_.u16[25] = e25; - r_.u16[26] = e26; - r_.u16[27] = e27; - r_.u16[28] = e28; - r_.u16[29] = e29; - r_.u16[30] = e30; - r_.u16[31] = e31; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, - uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - simde__m512i_private r_; - - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - r_.u32[ 8] = e8; - r_.u32[ 9] = e9; - r_.u32[10] = e10; - r_.u32[11] = e11; - r_.u32[12] = e12; - r_.u32[13] = e13; - r_.u32[14] = e14; - r_.u32[15] = e15; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m512i_private r_; - - r_.u64[ 0] = e0; - r_.u64[ 1] = e1; - r_.u64[ 2] = e2; - r_.u64[ 3] = e3; - r_.u64[ 4] = e4; - r_.u64[ 5] = e5; - r_.u64[ 6] = e6; - r_.u64[ 7] = e7; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, - int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, - int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, - int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) - return _mm512_set_epi8( - e63, e62, e61, e60, e59, e58, e57, e56, - e55, e54, e53, e52, e51, e50, e49, e48, - e47, e46, e45, e44, e43, e42, e41, e40, - e39, e38, e37, e36, e35, e34, e33, e32, - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0 - ); - #else - simde__m512i_private r_; - - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - r_.i8[32] = e32; - r_.i8[33] = e33; - r_.i8[34] = e34; - r_.i8[35] = e35; - r_.i8[36] = e36; - r_.i8[37] = e37; - r_.i8[38] = e38; - r_.i8[39] = e39; - r_.i8[40] = e40; - r_.i8[41] = e41; - r_.i8[42] = e42; - r_.i8[43] = e43; - r_.i8[44] = e44; - r_.i8[45] = e45; - r_.i8[46] = e46; - r_.i8[47] = e47; - r_.i8[48] = e48; - r_.i8[49] = e49; - r_.i8[50] = e50; - r_.i8[51] = e51; - r_.i8[52] = e52; - r_.i8[53] = e53; - r_.i8[54] = e54; - r_.i8[55] = e55; - r_.i8[56] = e56; - r_.i8[57] = e57; - r_.i8[58] = e58; - r_.i8[59] = e59; - r_.i8[60] = e60; - r_.i8[61] = e61; - r_.i8[62] = e62; - r_.i8[63] = e63; - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi8 - #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m128i v[] = { d, c, b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m128i[0] = d; - r_.m128i[1] = c; - r_.m128i[2] = b; - r_.m128i[3] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_set_m256 (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256 v[] = { b, a }; - return simde_mm512_load_ps(HEDLEY_STATIC_CAST(__m512 *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512_private r_; - - r_.m256[0] = b; - r_.m256[1] = a; - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256i v[] = { b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m256i[0] = b; - r_.m256i[1] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_set_m256d (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256d v[] = { b, a }; - return simde_mm512_load_pd(HEDLEY_STATIC_CAST(__m512d *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512d_private r_; - - r_.m256d[0] = b; - r_.m256d[1] = a; - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e0; - r_.f32[ 1] = e1; - r_.f32[ 2] = e2; - r_.f32[ 3] = e3; - r_.f32[ 4] = e4; - r_.f32[ 5] = e5; - r_.f32[ 6] = e6; - r_.f32[ 7] = e7; - r_.f32[ 8] = e8; - r_.f32[ 9] = e9; - r_.f32[10] = e10; - r_.f32[11] = e11; - r_.f32[12] = e12; - r_.f32[13] = e13; - r_.f32[14] = e14; - r_.f32[15] = e15; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ps - #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - r_.f64[4] = e4; - r_.f64[5] = e5; - r_.f64[6] = e6; - r_.f64[7] = e7; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_pd - #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set_ph (simde_float16 e31, simde_float16 e30, simde_float16 e29, simde_float16 e28, simde_float16 e27, simde_float16 e26, simde_float16 e25, simde_float16 e24, - simde_float16 e23, simde_float16 e22, simde_float16 e21, simde_float16 e20, simde_float16 e19, simde_float16 e18, simde_float16 e17, simde_float16 e16, - simde_float16 e15, simde_float16 e14, simde_float16 e13, simde_float16 e12, simde_float16 e11, simde_float16 e10, simde_float16 e9, simde_float16 e8, - simde_float16 e7, simde_float16 e6, simde_float16 e5, simde_float16 e4, simde_float16 e3, simde_float16 e2, simde_float16 e1, simde_float16 e0) { - simde__m512h_private r_; - - r_.f16[0] = e0; - r_.f16[1] = e1; - r_.f16[2] = e2; - r_.f16[3] = e3; - r_.f16[4] = e4; - r_.f16[5] = e5; - r_.f16[6] = e6; - r_.f16[7] = e7; - r_.f16[8] = e8; - r_.f16[9] = e9; - r_.f16[10] = e10; - r_.f16[11] = e11; - r_.f16[12] = e12; - r_.f16[13] = e13; - r_.f16[14] = e14; - r_.f16[15] = e15; - r_.f16[16] = e16; - r_.f16[17] = e17; - r_.f16[18] = e18; - r_.f16[19] = e19; - r_.f16[20] = e20; - r_.f16[21] = e21; - r_.f16[22] = e22; - r_.f16[23] = e23; - r_.f16[24] = e24; - r_.f16[25] = e25; - r_.f16[26] = e26; - r_.f16[27] = e27; - r_.f16[28] = e28; - r_.f16[29] = e29; - r_.f16[30] = e30; - r_.f16[31] = e31; - - return simde__m512h_from_private(r_); -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ph - #define _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ -/* :: End simde/x86/avx512/set.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setzero.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SETZERO_H) -#define SIMDE_X86_AVX512_SETZERO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CAST_H) -#define SIMDE_X86_AVX512_CAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castpd_ps (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_ps - #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castpd_si512 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_si512 - #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castps_pd (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_pd - #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castps_si512 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_si512 - #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castph_si512 (simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castph_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castph_si512 - #define _mm512_castph_si512(a) simde_mm512_castph_si512(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_castsi512_ph (simde__m512i a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castsi512_ph(a); - #else - simde__m512h r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ph - #define _mm512_castsi512_ph(a) simde_mm512_castsi512_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castsi512_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ps - #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castsi512_pd (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_pd - #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd128_pd512 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd128_pd512(a); - #else - simde__m512d_private r_; - r_.m128d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd128_pd512 - #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd256_pd512 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd256_pd512(a); - #else - simde__m512d_private r_; - r_.m256d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd256_pd512 - #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm512_castpd512_pd128 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd128(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd128 - #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_castpd512_pd256 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd256(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m256d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd256 - #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps128_ps512 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps128_ps512(a); - #else - simde__m512_private r_; - r_.m128[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps128_ps512 - #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps256_ps512 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps256_ps512(a); - #else - simde__m512_private r_; - r_.m256[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps256_ps512 - #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_castps512_ps128 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps128(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps128 - #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_castps512_ps256 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps256(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m256[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps256 - #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi128_si512 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi128_si512(a); - #else - simde__m512i_private r_; - r_.m128i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi128_si512 - #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi256_si512 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi256_si512(a); - #else - simde__m512i_private r_; - r_.m256i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi256_si512 - #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_castsi512_si128 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si128(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si128 - #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_castsi512_si256 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si256(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m256i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si256 - #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ -/* :: End simde/x86/avx512/cast.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setzero_si512(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_si512(); - #else - simde__m512i r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() - #undef _mm512_setzero_epi32 - #define _mm512_setzero_epi32() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setzero_ps(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_ps(); - #else - return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ps - #define _mm512_setzero_ps() simde_mm512_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setzero_pd(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_pd(); - #else - return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_pd - #define _mm512_setzero_pd() simde_mm512_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_setzero_ph(void) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_setzero_ph(); - #else - return simde_mm512_castsi512_ph(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ph - #define _mm512_setzero_ph() simde_mm512_setzero_ph() -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ -/* :: End simde/x86/avx512/setzero.h :: */ - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_x_clmul_u64(uint64_t x, uint64_t y) { - uint64_t x0, x1, x2, x3; - uint64_t y0, y1, y2, y3; - uint64_t z0, z1, z2, z3; - - x0 = x & UINT64_C(0x1111111111111111); - x1 = x & UINT64_C(0x2222222222222222); - x2 = x & UINT64_C(0x4444444444444444); - x3 = x & UINT64_C(0x8888888888888888); - y0 = y & UINT64_C(0x1111111111111111); - y1 = y & UINT64_C(0x2222222222222222); - y2 = y & UINT64_C(0x4444444444444444); - y3 = y & UINT64_C(0x8888888888888888); - - z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); - z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); - z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); - z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); - - z0 &= UINT64_C(0x1111111111111111); - z1 &= UINT64_C(0x2222222222222222); - z2 &= UINT64_C(0x4444444444444444); - z3 &= UINT64_C(0x8888888888888888); - - return z0 | z1 | z2 | z3; -} - -static uint64_t -simde_x_bitreverse_u64(uint64_t v) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t bytes = vreinterpret_u8_u64(vmov_n_u64(v)); - bytes = vrbit_u8(bytes); - bytes = vrev64_u8(bytes); - return vget_lane_u64(vreinterpret_u64_u8(bytes), 0); - #elif defined(SIMDE_X86_GFNI_NATIVE) - /* I don't think there is (or likely will ever be) a CPU with GFNI - * but not pclmulq, but this may be useful for things other than - * _mm_clmulepi64_si128. */ - __m128i vec = _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, v)); - - /* Reverse bits within each byte */ - vec = _mm_gf2p8affine_epi64_epi8(vec, _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); - - /* Reverse bytes */ - #if defined(SIMDE_X86_SSSE3_NATIVE) - vec = _mm_shuffle_epi8(vec, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); - #else - vec = _mm_or_si128(_mm_slli_epi16(vec, 8), _mm_srli_epi16(vec, 8)); - vec = _mm_shufflelo_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); - vec = _mm_shufflehi_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); - #endif - - return HEDLEY_STATIC_CAST(uint64_t, _mm_cvtsi128_si64(vec)); - #elif HEDLEY_HAS_BUILTIN(__builtin_bitreverse64) - return __builtin_bitreverse64(v); - #else - v = ((v >> 1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) << 1); - v = ((v >> 2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) << 2); - v = ((v >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4); - v = ((v >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) << 8); - v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16); - return (v >> 32) | (v << 32); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT(imm8) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #if defined(SIMDE_SHUFFLE_VECTOR_) - switch (imm8 & 0x11) { - case 0x00: - b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); - a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); - break; - case 0x01: - b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); - a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); - break; - case 0x10: - b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); - a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); - break; - case 0x11: - b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); - a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); - break; - } - #else - { - const uint64_t A = a_.u64[(imm8 ) & 1]; - const uint64_t B = b_.u64[(imm8 >> 4) & 1]; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - a_.u64[i] = A; - b_.u64[i] = B; - } - } - #endif - - simde__m128i_private reversed_; - { - #if defined(SIMDE_SHUFFLE_VECTOR_) - reversed_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, b_.u64, 1, 3); - #else - reversed_.u64[0] = a_.u64[1]; - reversed_.u64[1] = b_.u64[1]; - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { - reversed_.u64[i] = simde_x_bitreverse_u64(reversed_.u64[i]); - } - } - - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, reversed_.u64, 0, 2); - b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, reversed_.u64, 1, 3); - #else - a_.u64[1] = reversed_.u64[0]; - b_.u64[1] = reversed_.u64[1]; - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { - r_.u64[i] = simde_x_clmul_u64(a_.u64[i], b_.u64[i]); - } - - r_.u64[1] = simde_x_bitreverse_u64(r_.u64[1]) >> 1; - #else - r_.u64[0] = simde_x_clmul_u64( a_.u64[imm8 & 1], b_.u64[(imm8 >> 4) & 1]); - r_.u64[1] = simde_x_bitreverse_u64(simde_x_clmul_u64(simde_x_bitreverse_u64(a_.u64[imm8 & 1]), simde_x_bitreverse_u64(b_.u64[(imm8 >> 4) & 1]))) >> 1; - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_PCLMUL_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_clmulepi64_si128(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_clmulepi64_si128((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_clmulepi64_si128(a, b, imm8) _mm_clmulepi64_si128(a, b, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) && !defined(__clang__) - #define simde_mm_clmulepi64_si128(a, b, imm8) \ - simde__m128i_from_neon_u64( \ - vreinterpretq_u64_p128( \ - vmull_p64( \ - vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(a)), (imm8 ) & 1), \ - vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(b)), (imm8 >> 4) & 1) \ - ) \ - ) \ - ) -#endif -#if defined(SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES) - #undef _mm_clmulepi64_si128 - #define _mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT(imm8) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - simde__m128i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) - switch (imm8 & 0x01) { - case 0x00: - a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2); - break; - case 0x01: - a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3); - break; - } - switch (imm8 & 0x10) { - case 0x00: - b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2); - break; - case 0x10: - b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3); - break; - } - #else - a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; - a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; - b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; - b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { - a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); - b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); - - r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); - r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); - - r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; - } - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) - r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 2, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_ = simde__m256i_to_private(simde_mm256_set_m128i(simde__m128i_from_private(r_hi_), simde__m128i_from_private(r_lo_))); - r_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 32, r_.u64, r_.u64, 0, 2, 1, 3); - #else - r_.u64[0] = r_lo_.u64[0]; - r_.u64[1] = r_hi_.u64[0]; - r_.u64[2] = r_lo_.u64[1]; - r_.u64[3] = r_hi_.u64[1]; - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_clmulepi64_epi128(a, b, imm8) _mm256_clmulepi64_epi128(a, b, imm8) -#endif -#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_clmulepi64_epi128 - #define _mm256_clmulepi64_epi128(a, b, imm8) simde_mm256_clmulepi64_epi128(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_clmulepi64_epi128 (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT(imm8) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(HEDLEY_MSVC_VERSION) - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - #endif - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - switch (imm8 & 0x11) { - case 0x00: - r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x00); - r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x00); - break; - case 0x01: - r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x01); - r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x01); - break; - case 0x10: - r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x10); - r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x10); - break; - case 0x11: - r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x11); - r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x11); - break; - } - #else - simde__m256i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) - switch (imm8 & 0x01) { - case 0x00: - a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2, 4, 6); - break; - case 0x01: - a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3, 5, 7); - break; - } - switch (imm8 & 0x10) { - case 0x00: - b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2, 4, 6); - break; - case 0x10: - b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3, 5, 7); - break; - } - #else - a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; - a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; - a_lo_.u64[2] = a_.u64[((imm8 >> 0) & 1) + 4]; - a_lo_.u64[3] = a_.u64[((imm8 >> 0) & 1) + 6]; - b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; - b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; - b_lo_.u64[2] = b_.u64[((imm8 >> 4) & 1) + 4]; - b_lo_.u64[3] = b_.u64[((imm8 >> 4) & 1) + 6]; - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { - a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); - b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); - - r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); - r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); - - r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; - } - - #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) - r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 4, 1, 5, 2, 6, 3, 7); - #else - r_.u64[0] = r_lo_.u64[0]; - r_.u64[1] = r_hi_.u64[0]; - r_.u64[2] = r_lo_.u64[1]; - r_.u64[3] = r_hi_.u64[1]; - r_.u64[4] = r_lo_.u64[2]; - r_.u64[5] = r_hi_.u64[2]; - r_.u64[6] = r_lo_.u64[3]; - r_.u64[7] = r_hi_.u64[3]; - #endif - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_clmulepi64_epi128(a, b, imm8) _mm512_clmulepi64_epi128(a, b, imm8) -#endif -#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_clmulepi64_epi128 - #define _mm512_clmulepi64_epi128(a, b, imm8) simde_mm512_clmulepi64_epi128(a, b, imm8) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_CLMUL_H) */ -/* :: End simde/x86/clmul.h :: */ diff --git a/src/simde/x86/f16c.h b/src/simde/x86/f16c.h deleted file mode 100644 index 951df4f48..000000000 --- a/src/simde/x86/f16c.h +++ /dev/null @@ -1,34598 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/f16c.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if !defined(SIMDE_X86_F16C_H) -#define SIMDE_X86_F16C_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ - -#if !defined(SIMDE_X86_PF16C_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_PF16C_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_ph(simde__m128 a, const int imm8) { - simde__m128_private a_ = simde__m128_to_private(a); - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - - HEDLEY_STATIC_CAST(void, imm8); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - r_.neon_f16 = vcombine_f16(vcvt_f16_f32(a_.neon_f32), vdup_n_f16(SIMDE_FLOAT16_C(0.0))); - #elif defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f16[i] = simde_float16_from_float32(a_.f32[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_F16C_NATIVE) - #define simde_mm_cvtps_ph(a, imm8) _mm_cvtps_ph(a, imm8) -#endif -#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_ph(a, sae) simde_mm_cvtps_ph(a, sae) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtph_ps(simde__m128i a) { - #if defined(SIMDE_X86_F16C_NATIVE) - return _mm_cvtph_ps(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - r_.neon_f32 = vcvt_f32_f16(vget_low_f16(a_.neon_f16)); - #elif defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(a_.f16[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) - #define _mm_cvtph_ps(a) simde_mm_cvtph_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtps_ph(simde__m256 a, const int imm8) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128i_private r_; - - HEDLEY_STATIC_CAST(void, imm8); - - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f16[i] = simde_float16_from_float32(a_.f32[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); - } - #endif - - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_F16C_NATIVE) - #define simde_mm256_cvtps_ph(a, imm8) _mm256_cvtps_ph(a, imm8) -#endif -#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) - #define _mm256_cvtps_ph(a, imm8) simde_mm256_cvtps_ph(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cvtph_ps(simde__m128i a) { - #if defined(SIMDE_X86_F16C_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtph_ps(a); - #elif defined(SIMDE_X86_F16C_NATIVE) - return _mm256_setr_m128( - _mm_cvtph_ps(a), - _mm_cvtph_ps(_mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(a), 0xee))) - ); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__m256_private r_; - - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(a_.f16[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) - #define _mm256_cvtph_ps(a) simde_mm256_cvtph_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_F16C_H) */ -/* :: End simde/x86/f16c.h :: */ diff --git a/src/simde/x86/fma.h b/src/simde/x86/fma.h deleted file mode 100644 index 0952a7feb..000000000 --- a/src/simde/x86/fma.h +++ /dev/null @@ -1,35155 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/fma.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2019 Evan Nemerson - */ - -#if !defined(SIMDE_X86_FMA_H) -#define SIMDE_X86_FMA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ - -#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_pd(a, b, c); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); - #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_pd - #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_pd(a, b, c); - #else - return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_pd - #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_ps(a, b, c); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ps - #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_ps(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); - } - - return simde__m256_from_private(r_); - #else - return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_ps - #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_sd(a, b, c); - #else - return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_sd - #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_ss(a, b, c); - #else - return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ss - #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_pd(a, b, c); - #else - return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_pd - #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_pd(a, b, c); - #else - return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_pd - #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_ps(a, b, c); - #else - return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_ps - #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_ps(a, b, c); - #else - return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_ps - #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_pd(a, b, c); - #else - return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_pd - #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_pd(a, b, c); - #else - return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_pd - #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_ps(a, b, c); - #else - return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ps - #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_ps(a, b, c); - #else - return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_ps - #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_sd(a, b, c); - #else - return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_sd - #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_ss(a, b, c); - #else - return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ss - #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_pd - #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_pd - #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_ps - #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_ps - #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_pd - #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_pd - #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ps - #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_ps - #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_sd - #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = a_; - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ss - #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_pd - #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_pd - #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ps - #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_ps - #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_sd - #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = simde__m128_to_private(a); - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ss - #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_FMA_H) */ -/* :: End simde/x86/fma.h :: */ diff --git a/src/simde/x86/gfni.h b/src/simde/x86/gfni.h deleted file mode 100644 index 3821c0464..000000000 --- a/src/simde/x86/gfni.h +++ /dev/null @@ -1,55968 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/gfni.h :: */ -/* Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Christopher Moore - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_GFNI_H) -#define SIMDE_X86_GFNI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/add.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_ADD_H) -#define SIMDE_X86_AVX512_ADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_TYPES_H) -#define SIMDE_X86_AVX512_TYPES_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for - * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte - * aligned even if we reduce the alignment requirements of other members. - * - * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the - * to/from private functions will break, and I'm not willing to change their APIs to use - * pointers (which would also require more verbose code on the caller side) just to make - * MSVC happy. - * - * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, - * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to - * fix this without requiring API changes (except transparently through macros), patches - * are welcome. - */ - -# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) -# if defined(SIMDE_X86_AVX512F_NATIVE) -# undef SIMDE_X86_AVX512F_NATIVE -# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") -# endif -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 -# else -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 -# endif - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_ALIGN_TO_16 __m128bh n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_BF16_NATIVE) - SIMDE_ALIGN_TO_32 __m256bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_AVX512_ALIGN __m512bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - SIMDE_AVX512_ALIGN __m512h n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512h_private; - - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; - SIMDE_AVX512_ALIGN simde__m128i m128i[4]; - SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; - SIMDE_AVX512_ALIGN simde__m256i m256i[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512i_private; - -/* Intel uses the same header (immintrin.h) for everything AVX and - * later. If native aliases are enabled, and the machine has native - * support for AVX imintrin.h will already have been included, which - * means simde__m512* will already have been defined. So, even - * if the machine doesn't support AVX512F we need to use the native - * type; it has already been defined. - * - * However, we also can't just assume that including immintrin.h does - * actually define these. It could be a compiler which supports AVX - * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we - * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, - * so we assume that if it's present AVX-512F has already been - * declared. - * - * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC - * uses the preprocessor to define all the _MM_CMPINT_* members, - * in most compilers they are simply normal enum members. However, - * all compilers I've looked at use an object-like macro for - * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT - * is included in case a compiler does the reverse, though I haven't - * run into one which does. - * - * As for the ICC check, unlike other compilers, merely using the - * AVX-512 types causes ICC to generate AVX-512 instructions. */ -#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && \ - (defined(SIMDE_X86_AVX512F_NATIVE) || \ - !(defined(HEDLEY_INTEL_VERSION) || (defined(HEDLEY_MSVC_VERSION) && !defined(__clang__)))) - typedef __m512 simde__m512; - typedef __m512i simde__m512i; - typedef __m512d simde__m512d; - - typedef __mmask8 simde__mmask8; - typedef __mmask16 simde__mmask16; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512_private simde__m512; - typedef simde__m512i_private simde__m512i; - typedef simde__m512d_private simde__m512d; - #endif - - typedef uint8_t simde__mmask8; - typedef uint16_t simde__mmask16; -#endif - -#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) - typedef __m128bh simde__m128bh; - typedef __m256bh simde__m256bh; - typedef __m512bh simde__m512bh; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m128bh_private simde__m128bh; - typedef simde__m256bh_private simde__m256bh; - typedef simde__m512bh_private simde__m512bh; - #endif -#endif - -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - typedef __m512h simde__m512h; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_FLOAT16_VECTOR) - typedef simde_float16 simde__m512h SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512h_private simde__m512h; - #endif -#endif - -/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is - * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang - * both are in avx512bwintrin.h), not AVX-512F. However, we don't have - * a good (not-compiler-specific) way to detect if these headers have - * been included. In compilers which support AVX-512F but not - * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) - * won't exist. - * - * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t - * in all compilers, so it's safe to use these instead of typedefs to - * __mmask{16,32}. If you run into a problem with this please file an - * issue and we'll try to figure out a work-around. */ -typedef uint32_t simde__mmask32; -typedef uint64_t simde__mmask64; -#if !defined(__mmask16) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint16_t __mmask16; - #else - #define __mmask16 uint16_t; - #endif -#endif -#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint32_t __mmask32; - #else - #define __mmask32 uint32_t; - #endif -#endif -#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - #if defined(HEDLEY_GCC_VERSION) - typedef unsigned long long __mmask64; - #else - typedef uint64_t __mmask64; - #endif - #else - #define __mmask64 uint64_t; - #endif -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m512 __m512; - typedef simde__m512i __m512i; - typedef simde__m512d __m512d; - #else - #define __m512 simde__m512 - #define __m512i simde__m512i - #define __m512d simde__m512d - #endif -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m128bh __m128bh; - typedef simde__m256bh __m256bh; - typedef simde__m512bh __m512bh; - #else - #define __m128bh simde__m128bh - #define __m256bh simde__m256bh - #define __m512bh simde__m512bh - #endif -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - //typedef simde__m128h __m128h; - //typedef simde__m256h __m256h; - typedef simde__m512h __m512h; - #else - //#define __m128h simde__m128h - //#define __m256h simde__m256h - #define __m512h simde__m512h - #endif -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h), "simde__m512h size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h_private), "simde__m512h_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h) == 32, "simde__m512h is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h_private) == 32, "simde__m512h_private is not 32-byte aligned"); -#endif - -#define SIMDE_MM_CMPINT_EQ 0 -#define SIMDE_MM_CMPINT_LT 1 -#define SIMDE_MM_CMPINT_LE 2 -#define SIMDE_MM_CMPINT_FALSE 3 -#define SIMDE_MM_CMPINT_NE 4 -#define SIMDE_MM_CMPINT_NLT 5 -#define SIMDE_MM_CMPINT_NLE 6 -#define SIMDE_MM_CMPINT_TRUE 7 -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) -#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ -#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT -#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE -#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE -#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE -#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT -#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE -#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh -simde__m128bh_from_private(simde__m128bh_private v) { - simde__m128bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh_private -simde__m128bh_to_private(simde__m128bh v) { - simde__m128bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh -simde__m256bh_from_private(simde__m256bh_private v) { - simde__m256bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh_private -simde__m256bh_to_private(simde__m256bh v) { - simde__m256bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh -simde__m512bh_from_private(simde__m512bh_private v) { - simde__m512bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh_private -simde__m512bh_to_private(simde__m512bh v) { - simde__m512bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde__m512_from_private(simde__m512_private v) { - simde__m512 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512_private -simde__m512_to_private(simde__m512 v) { - simde__m512_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde__m512i_from_private(simde__m512i_private v) { - simde__m512i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i_private -simde__m512i_to_private(simde__m512i v) { - simde__m512i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde__m512d_from_private(simde__m512d_private v) { - simde__m512d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d_private -simde__m512d_to_private(simde__m512d v) { - simde__m512d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde__m512h_from_private(simde__m512h_private v) { - simde__m512h r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h_private -simde__m512h_to_private(simde__m512h v) { - simde__m512h_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ -/* :: End simde/x86/avx512/types.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2019-2020 Michael R. Crusoe - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX2_H) -#define SIMDE_X86_AVX2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi8 - #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi16 - #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi32(simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi32 - #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi8 - #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi16 - #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi16(a, b); - #else - return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi16 - #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi32 - #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi32(a, b); - #else - return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi32 - #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi64 - #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm256_setzero_si256(); - - for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.m128i_private[h].i8[i] = 0; - } else if (srcpos > 15) { - r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; - } else { - r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; - } - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) -# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_alignr_epi8(a, b, count) \ - simde_mm256_set_m128i( \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_alignr_epi8 - #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_and_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_si256 - #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_andnot_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_si256 - #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi8 - #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi16 - #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadds_epi16(a, b); - #else - return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadds_epi16 - #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu8 - #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu16 - #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu8 - #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu16 - #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) -# define simde_mm_blend_epi32(a, b, imm8) \ - simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi32 - #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) -#elif defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi16(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi16 - #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi32(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi32 - #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_blendv_epi8(a, b, mask); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - mask_ = simde__m256i_to_private(mask); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); - r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(mask_.i8) tmp = mask_.i8 >> 7; - r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - int8_t tmp = mask_.i8[i] >> 7; - r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_epi8 - #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastb_epi8(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastb_epi8 - #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastb_epi8(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastb_epi8 - #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastw_epi16(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastw_epi16 - #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastw_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastw_epi16 - #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastd_epi32(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastd_epi32 - #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastd_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastd_epi32 - #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastq_epi64(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastq_epi64 - #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastq_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastq_epi64 - #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastss_ps(a); - #elif defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_shuffle_ps(a, a, 0); - #else - simde__m128_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastss_ps - #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastss_ps(a); - #else - simde__m256_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - __m128 tmp = _mm_permute_ps(a_.n, 0); - r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); - #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) - r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastss_ps - #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_broadcastsd_pd (simde__m128d a) { - return simde_mm_movedup_pd(a); -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastsd_pd - #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastsd_pd(a); - #else - simde__m256d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsd_pd - #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) - return _mm256_broadcastsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = a_; - r_.m128i_private[1] = a_; - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = a_.i64[1]; - r_.i64[2] = a_.i64[0]; - r_.i64[3] = a_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsi128_si256 - #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) - #undef _mm_broadcastsi128_si256 - #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i - imm8; - if(i >= (ssize/2)) { - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i + imm8; - if(i < (ssize/2)) { - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi8 - #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi16 - #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi32 - #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi64 - #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi8 - #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 > b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi16 - #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi32 - #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi64 - #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi16 - #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi32 - #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi64 - #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi32 - #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi64 - #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_epi64 - #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi16 - #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi32 - #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi64 - #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi32 - #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi64 - #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu32_epi64 - #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi8 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31){ - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i8[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi8 - #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi16 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i16[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi16 - #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extracti128_si256 - #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi32 - #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi32 - #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi32 - #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi32 - #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi32 - #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi32 - #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi32 - #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi32 - #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi64 - #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi64 - #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256i_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi64 - #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi64 - #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi64 - #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi64 - #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi64 - #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi64 - #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_ps - #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_ps - #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_ps - #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - src_ = simde__m256_to_private(src), - mask_ = simde__m256_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_ps - #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_ps - #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_ps - #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_ps - #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_ps - #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_pd - #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_pd - #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_pd - #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_pd - #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_pd - #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_pd - #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_pd - #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_pd - #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[ imm8 & 1 ] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_inserti128_si256 - #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_madd_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); - SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); - - SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); - SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); - product = a32x16 * b32x16; - - even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); - odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); - - r_.i32 = even + odd; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_madd_epi16 - #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maddubs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maddubs_epi16 - #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi32(mem_addr, mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi32 - #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi32(mem_addr, mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi32 - #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi64 - #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi64 - #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi32(mem_addr, mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi32 - #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi32(mem_addr, mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi32 - #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi64 - #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi64 - #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_max_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi8 - #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu8 - #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu16 - #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu32 - #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi16 - #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi32 - #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_min_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi8 - #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi16 - #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi32 - #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu8 - #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu16 - #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu32 - #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_movemask_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_movemask_epi8(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - uint32_t r = 0; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); - } - #else - r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); - } - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_epi8 - #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - const int a_offset1 = imm8 & 4; - const int b_offset1 = (imm8 & 3) << 2; - const int a_offset2 = (imm8 >> 3) & 4; - const int b_offset2 = ((imm8 >> 3) & 3) << 2; - - #if defined(simde_math_abs) - const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; - for (int i = 0 ; i < halfway_point ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); - r_.u16[halfway_point + i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mpsadbw_epu8 - #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhrs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi16 - #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi32(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi32 - #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_si256 - #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi16 - #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi32(a, b); - #else - simde__m256i_private - r_, - v_[] = { - simde__m256i_to_private(a), - simde__m256i_to_private(b) - }; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); - r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi32 - #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi16 - #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi32 - #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2x128_si256 - #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; - r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; - r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; - r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_epi64 - #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; - r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; - r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; - r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_pd - #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 7]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_epi32 - #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); - #else - return _mm256_permutevar8x32_ps(a, idx); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 7]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_ps - #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sad_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sad_epu8 - #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_shuffle_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { - r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; - r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi8 - #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_shuffle_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 32, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi32 - #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4, \ - 8, 9, 10, 11, \ - ((((imm8) ) & 3) + 8 + 4), \ - ((((imm8) >> 2) & 3) + 8 + 4), \ - ((((imm8) >> 4) & 3) + 8 + 4), \ - ((((imm8) >> 6) & 3) + 8 + 4) \ - ) }); })) -#else -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflehi_epi16 - #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7, \ - ((((imm8) ) & 3) + 8), \ - ((((imm8) >> 2) & 3) + 8), \ - ((((imm8) >> 4) & 3) + 8), \ - ((((imm8) >> 6) & 3) + 8), \ - 12, 13, 14, 15) }); })) -#else -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflelo_epi16 - #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi8 - #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi16 - #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi32 - #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi16 - #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi32 - #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi64 - #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* Note: There is no consistency in how compilers handle values outside of - the expected range, hence the discrepancy between what we allow and what - Intel specifies. Some compilers will return 0, others seem to just mask - off everything outside of the range. */ - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { - r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi16 - #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { - r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi32 - #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi64 - #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) - imm8; - r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_si256 - #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); - r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi32 - #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi32 - #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); - r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi64 - #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi64 - #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi16 - #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi32 - #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi16 - #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi32 - #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_srav_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); - r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srav_epi32 - #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srav_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - count_ = simde__m256i_to_private(count); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); - r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - if (shift > 31) shift = 31; - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srav_epi32 - #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi16 - #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi32 - #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi64 - #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - if (imm8 > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { - r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); - } - #else - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi16 - #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { - r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi32 - #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi64 - #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = imm8 + HEDLEY_STATIC_CAST(int, i); - r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_si256 - #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi32 - #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi32 - #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi64 - #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi64 - #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - return __builtin_nontemporal_load(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi8 - #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi16 - #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi16(a, b); - #else - return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi16 - #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi32 - #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi32(a, b); - #else - return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi32 - #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi64 - #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi8 - #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi16 - #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsubs_epi16(a, b); - #else - return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsubs_epi16 - #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu8 - #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu16 - #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_x_mm256_test_all_ones (simde__m256i a) { - simde__m256i_private a_ = simde__m256i_to_private(a); - int r; - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi8 - #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi16 - #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 0, 8, 1, 9, 4, 12, 5, 13); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi32 - #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi64 - #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi8 - #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 4, 20, 5, 21, 6, 22, 7, 23, - 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi16 - #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 2, 10, 3, 11, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi32 - #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi64 - #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_xor_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_si256 - #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX2_H) */ -/* :: End simde/x86/avx2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_MOV_H) -#define SIMDE_X86_AVX512_MOV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CAST_H) -#define SIMDE_X86_AVX512_CAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castpd_ps (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_ps - #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castpd_si512 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_si512 - #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castps_pd (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_pd - #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castps_si512 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_si512 - #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castph_si512 (simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castph_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castph_si512 - #define _mm512_castph_si512(a) simde_mm512_castph_si512(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_castsi512_ph (simde__m512i a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castsi512_ph(a); - #else - simde__m512h r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ph - #define _mm512_castsi512_ph(a) simde_mm512_castsi512_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castsi512_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ps - #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castsi512_pd (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_pd - #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd128_pd512 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd128_pd512(a); - #else - simde__m512d_private r_; - r_.m128d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd128_pd512 - #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd256_pd512 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd256_pd512(a); - #else - simde__m512d_private r_; - r_.m256d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd256_pd512 - #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm512_castpd512_pd128 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd128(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd128 - #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_castpd512_pd256 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd256(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m256d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd256 - #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps128_ps512 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps128_ps512(a); - #else - simde__m512_private r_; - r_.m128[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps128_ps512 - #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps256_ps512 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps256_ps512(a); - #else - simde__m512_private r_; - r_.m256[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps256_ps512 - #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_castps512_ps128 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps128(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps128 - #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_castps512_ps256 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps256(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m256[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps256 - #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi128_si512 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi128_si512(a); - #else - simde__m512i_private r_; - r_.m128i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi128_si512 - #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi256_si512 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi256_si512(a); - #else - simde__m512i_private r_; - r_.m256i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi256_si512 - #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_castsi512_si128 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si128(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si128 - #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_castsi512_si256 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si256(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m256i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si256 - #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ -/* :: End simde/x86/avx512/cast.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SET_H) -#define SIMDE_X86_AVX512_SET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/load.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LOAD_H) -#define SIMDE_X86_AVX512_LOAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_load_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); - #else - simde__m512d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_pd - #define _mm512_load_pd(a) simde_mm512_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_load_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); - #else - simde__m512 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ps - #define _mm512_load_ps(a) simde_mm512_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_load_ph (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h)); - #else - simde__m512h r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ph - #define _mm512_load_ph(a) simde_mm512_load_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_load_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); - #else - simde__m512i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); - return r; - #endif -} -#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_epi8 - #undef _mm512_load_epi16 - #undef _mm512_load_epi32 - #undef _mm512_load_epi64 - #undef _mm512_load_si512 - #define _mm512_load_si512(a) simde_mm512_load_si512(a) - #define _mm512_load_epi8(a) simde_mm512_load_si512(a) - #define _mm512_load_epi16(a) simde_mm512_load_si512(a) - #define _mm512_load_epi32(a) simde_mm512_load_si512(a) - #define _mm512_load_epi64(a) simde_mm512_load_si512(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ -/* :: End simde/x86/avx512/load.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, - int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - simde__m512i_private r_; - - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - r_.i16[16] = e16; - r_.i16[17] = e17; - r_.i16[18] = e18; - r_.i16[19] = e19; - r_.i16[20] = e20; - r_.i16[21] = e21; - r_.i16[22] = e22; - r_.i16[23] = e23; - r_.i16[24] = e24; - r_.i16[25] = e25; - r_.i16[26] = e26; - r_.i16[27] = e27; - r_.i16[28] = e28; - r_.i16[29] = e29; - r_.i16[30] = e30; - r_.i16[31] = e31; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi16 - #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - r_.i32[ 8] = e8; - r_.i32[ 9] = e9; - r_.i32[10] = e10; - r_.i32[11] = e11; - r_.i32[12] = e12; - r_.i32[13] = e13; - r_.i32[14] = e14; - r_.i32[15] = e15; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi32 - #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - r_.i64[4] = e4; - r_.i64[5] = e5; - r_.i64[6] = e6; - r_.i64[7] = e7; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi64 - #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, - uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, - uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, - uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, - uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m512i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - r_.u8[32] = e32; - r_.u8[33] = e33; - r_.u8[34] = e34; - r_.u8[35] = e35; - r_.u8[36] = e36; - r_.u8[37] = e37; - r_.u8[38] = e38; - r_.u8[39] = e39; - r_.u8[40] = e40; - r_.u8[41] = e41; - r_.u8[42] = e42; - r_.u8[43] = e43; - r_.u8[44] = e44; - r_.u8[45] = e45; - r_.u8[46] = e46; - r_.u8[47] = e47; - r_.u8[48] = e48; - r_.u8[49] = e49; - r_.u8[50] = e50; - r_.u8[51] = e51; - r_.u8[52] = e52; - r_.u8[53] = e53; - r_.u8[54] = e54; - r_.u8[55] = e55; - r_.u8[56] = e56; - r_.u8[57] = e57; - r_.u8[58] = e58; - r_.u8[59] = e59; - r_.u8[60] = e60; - r_.u8[61] = e61; - r_.u8[62] = e62; - r_.u8[63] = e63; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, - uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, - uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m512i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - r_.u16[16] = e16; - r_.u16[17] = e17; - r_.u16[18] = e18; - r_.u16[19] = e19; - r_.u16[20] = e20; - r_.u16[21] = e21; - r_.u16[22] = e22; - r_.u16[23] = e23; - r_.u16[24] = e24; - r_.u16[25] = e25; - r_.u16[26] = e26; - r_.u16[27] = e27; - r_.u16[28] = e28; - r_.u16[29] = e29; - r_.u16[30] = e30; - r_.u16[31] = e31; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, - uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - simde__m512i_private r_; - - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - r_.u32[ 8] = e8; - r_.u32[ 9] = e9; - r_.u32[10] = e10; - r_.u32[11] = e11; - r_.u32[12] = e12; - r_.u32[13] = e13; - r_.u32[14] = e14; - r_.u32[15] = e15; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m512i_private r_; - - r_.u64[ 0] = e0; - r_.u64[ 1] = e1; - r_.u64[ 2] = e2; - r_.u64[ 3] = e3; - r_.u64[ 4] = e4; - r_.u64[ 5] = e5; - r_.u64[ 6] = e6; - r_.u64[ 7] = e7; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, - int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, - int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, - int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) - return _mm512_set_epi8( - e63, e62, e61, e60, e59, e58, e57, e56, - e55, e54, e53, e52, e51, e50, e49, e48, - e47, e46, e45, e44, e43, e42, e41, e40, - e39, e38, e37, e36, e35, e34, e33, e32, - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0 - ); - #else - simde__m512i_private r_; - - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - r_.i8[32] = e32; - r_.i8[33] = e33; - r_.i8[34] = e34; - r_.i8[35] = e35; - r_.i8[36] = e36; - r_.i8[37] = e37; - r_.i8[38] = e38; - r_.i8[39] = e39; - r_.i8[40] = e40; - r_.i8[41] = e41; - r_.i8[42] = e42; - r_.i8[43] = e43; - r_.i8[44] = e44; - r_.i8[45] = e45; - r_.i8[46] = e46; - r_.i8[47] = e47; - r_.i8[48] = e48; - r_.i8[49] = e49; - r_.i8[50] = e50; - r_.i8[51] = e51; - r_.i8[52] = e52; - r_.i8[53] = e53; - r_.i8[54] = e54; - r_.i8[55] = e55; - r_.i8[56] = e56; - r_.i8[57] = e57; - r_.i8[58] = e58; - r_.i8[59] = e59; - r_.i8[60] = e60; - r_.i8[61] = e61; - r_.i8[62] = e62; - r_.i8[63] = e63; - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi8 - #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m128i v[] = { d, c, b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m128i[0] = d; - r_.m128i[1] = c; - r_.m128i[2] = b; - r_.m128i[3] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_set_m256 (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256 v[] = { b, a }; - return simde_mm512_load_ps(HEDLEY_STATIC_CAST(__m512 *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512_private r_; - - r_.m256[0] = b; - r_.m256[1] = a; - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256i v[] = { b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m256i[0] = b; - r_.m256i[1] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_set_m256d (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256d v[] = { b, a }; - return simde_mm512_load_pd(HEDLEY_STATIC_CAST(__m512d *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512d_private r_; - - r_.m256d[0] = b; - r_.m256d[1] = a; - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e0; - r_.f32[ 1] = e1; - r_.f32[ 2] = e2; - r_.f32[ 3] = e3; - r_.f32[ 4] = e4; - r_.f32[ 5] = e5; - r_.f32[ 6] = e6; - r_.f32[ 7] = e7; - r_.f32[ 8] = e8; - r_.f32[ 9] = e9; - r_.f32[10] = e10; - r_.f32[11] = e11; - r_.f32[12] = e12; - r_.f32[13] = e13; - r_.f32[14] = e14; - r_.f32[15] = e15; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ps - #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - r_.f64[4] = e4; - r_.f64[5] = e5; - r_.f64[6] = e6; - r_.f64[7] = e7; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_pd - #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set_ph (simde_float16 e31, simde_float16 e30, simde_float16 e29, simde_float16 e28, simde_float16 e27, simde_float16 e26, simde_float16 e25, simde_float16 e24, - simde_float16 e23, simde_float16 e22, simde_float16 e21, simde_float16 e20, simde_float16 e19, simde_float16 e18, simde_float16 e17, simde_float16 e16, - simde_float16 e15, simde_float16 e14, simde_float16 e13, simde_float16 e12, simde_float16 e11, simde_float16 e10, simde_float16 e9, simde_float16 e8, - simde_float16 e7, simde_float16 e6, simde_float16 e5, simde_float16 e4, simde_float16 e3, simde_float16 e2, simde_float16 e1, simde_float16 e0) { - simde__m512h_private r_; - - r_.f16[0] = e0; - r_.f16[1] = e1; - r_.f16[2] = e2; - r_.f16[3] = e3; - r_.f16[4] = e4; - r_.f16[5] = e5; - r_.f16[6] = e6; - r_.f16[7] = e7; - r_.f16[8] = e8; - r_.f16[9] = e9; - r_.f16[10] = e10; - r_.f16[11] = e11; - r_.f16[12] = e12; - r_.f16[13] = e13; - r_.f16[14] = e14; - r_.f16[15] = e15; - r_.f16[16] = e16; - r_.f16[17] = e17; - r_.f16[18] = e18; - r_.f16[19] = e19; - r_.f16[20] = e20; - r_.f16[21] = e21; - r_.f16[22] = e22; - r_.f16[23] = e23; - r_.f16[24] = e24; - r_.f16[25] = e25; - r_.f16[26] = e26; - r_.f16[27] = e27; - r_.f16[28] = e28; - r_.f16[29] = e29; - r_.f16[30] = e30; - r_.f16[31] = e31; - - return simde__m512h_from_private(r_); -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ph - #define _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ -/* :: End simde/x86/avx512/set.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi8(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi8 - #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi16(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi16 - #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi32(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi32 - #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi64(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi64 - #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_pd(src, k, a); - #else - return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_pd - #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_ps(src, k, a); - #else - return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_ps - #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi8(src, k, a); - #else - simde__m256i_private - r_, - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi8 - #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi16(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi16 - #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi32(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi32 - #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi64(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi64 - #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_pd(src, k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_pd - #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_ps(src, k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_ps - #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi8(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi8 - #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi16(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi16 - #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi32(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi32 - #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi64(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi64 - #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_pd(src, k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_pd - #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_ps(src, k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_ps - #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_mask_mov_ph (simde__m512h src, simde__mmask32 k, simde__m512h a) { - return simde_mm512_castsi512_ph(simde_mm512_mask_mov_epi16(simde_mm512_castph_si512(src), k, simde_mm512_castph_si512(a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi8(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi8 - #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi16(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi16 - #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi32(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi32 - #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi64(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi64 - #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_pd(k, a); - #else - return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_pd - #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_ps(k, a); - #else - return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_ps - #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi8(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi8 - #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi16(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi16 - #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi32(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi32 - #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi64(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi64 - #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_pd(k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_pd - #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_ps(k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_ps - #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi8(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi8 - #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi16(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi16 - #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi32(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi32 - #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi64(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi64 - #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_pd(k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_pd - #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_ps(k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_ps - #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ -/* :: End simde/x86/avx512/mov.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi8 - #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi8 - #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi16 - #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi16 - #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi32 - #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi32 - #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi64 - #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi64 - #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_mask_add_ss(src, k, a, b); - #elif 1 - simde__m128_private - src_ = simde__m128_to_private(src), - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_ss - #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_maskz_add_ss(k, a, b); - #elif 1 - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_ss - #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_add_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi16 - #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_add_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi16 - #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi32 - #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi32 - #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi64 - #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi64 - #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi8 - #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi8 - #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi8 - #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi16 - #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi16 - #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi16 - #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi32 - #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi32 - #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi32 - #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi64 - #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi64 - #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi64 - #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_add_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_ps - #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_ps - #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_ps - #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_add_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_pd - #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_pd - #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_pd - #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ -/* :: End simde/x86/avx512/add.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/and.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_AND_H) -#define SIMDE_X86_AVX512_AND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_and_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_pd - #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_and_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_ps - #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_ps - #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_ps - #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_pd - #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_pd - #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi32 - #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi32 - #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi32 - #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi64 - #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi64 - #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi64 - #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_si512 - #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ -/* :: End simde/x86/avx512/and.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/broadcast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_BROADCAST_H) -#define SIMDE_X86_AVX512_BROADCAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SET1_H) -#define SIMDE_X86_AVX512_SET1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi8(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi8 - #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi8 - #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi8 - #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi16(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi16 - #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi16 - #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi16 - #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi32(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi32 - #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi32 - #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi32 - #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi64 (int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi64(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi64 - #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi64 - #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi64 - #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu8 (uint8_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu16 (uint16_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu32 (uint32_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu64 (uint64_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_ps(a); - #else - simde__m512_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ps - #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_pd(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_pd - #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set1_ph (simde_float16 a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_set1_ph(a); - #else - simde__m512h_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.f16[i] = a; - } - - return simde__m512h_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ph - #define _mm512_set1_ph(a) simde_mm512_set1_ph(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ -/* :: End simde/x86/avx512/set1.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_f32x2 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_broadcast_f32x2(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f32x2 - #define _mm256_broadcast_f32x2(a) simde_mm256_broadcast_f32x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_broadcast_f32x2(simde__m256 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_mask_broadcast_f32x2(src, k, a); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f32x2 - #define _mm256_mask_broadcast_f32x2(src, k, a) simde_mm256_mask_broadcast_f32x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_broadcast_f32x2(simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_maskz_broadcast_f32x2(k, a); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f32x2 - #define _mm256_maskz_broadcast_f32x2(k, a) simde_mm256_maskz_broadcast_f32x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x2 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f32x2(a); - #else - simde__m512_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x2 - #define _mm512_broadcast_f32x2(a) simde_mm512_broadcast_f32x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x2(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f32x2(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x2 - #define _mm512_mask_broadcast_f32x2(src, k, a) simde_mm512_mask_broadcast_f32x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x2(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f32x2(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x2 - #define _mm512_maskz_broadcast_f32x2(k, a) simde_mm512_maskz_broadcast_f32x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x8 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f32x8(a); - #else - simde__m512_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=8) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - r_.f32[i + 2] = a_.f32[2]; - r_.f32[i + 3] = a_.f32[3]; - r_.f32[i + 4] = a_.f32[4]; - r_.f32[i + 5] = a_.f32[5]; - r_.f32[i + 6] = a_.f32[6]; - r_.f32[i + 7] = a_.f32[7]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x8 - #define _mm512_broadcast_f32x8(a) simde_mm512_broadcast_f32x8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x8(simde__m512 src, simde__mmask16 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f32x8(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x8 - #define _mm512_mask_broadcast_f32x8(src, k, a) simde_mm512_mask_broadcast_f32x8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x8(simde__mmask16 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f32x8(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x8 - #define _mm512_maskz_broadcast_f32x8(k, a) simde_mm512_maskz_broadcast_f32x8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcast_f64x2 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_broadcast_f64x2(a); - #else - simde__m512d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[0]; - r_.f64[i + 1] = a_.f64[1]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f64x2 - #define _mm512_broadcast_f64x2(a) simde_mm512_broadcast_f64x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcast_f64x2(simde__m512d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_broadcast_f64x2(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f64x2 - #define _mm512_mask_broadcast_f64x2(src, k, a) simde_mm512_mask_broadcast_f64x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_broadcast_f64x2(k, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f64x2 - #define _mm512_maskz_broadcast_f64x2(k, a) simde_mm512_maskz_broadcast_f64x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_f32x4 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_broadcast_f32x4(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = a_; - r_.m128_private[1] = a_; - #elif defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 0, 1, 2, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 4) { - r_.f32[ i ] = a_.f32[0]; - r_.f32[i + 1] = a_.f32[1]; - r_.f32[i + 2] = a_.f32[2]; - r_.f32[i + 3] = a_.f32[3]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f32x4 - #define _mm256_broadcast_f32x4(a) simde_mm256_broadcast_f32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_broadcast_f32x4(simde__m256 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_broadcast_f32x4(src, k, a); - #else - return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f32x4 - #define _mm256_mask_broadcast_f32x4(src, k, a) simde_mm256_mask_broadcast_f32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_broadcast_f32x4(simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_broadcast_f32x4(k, a); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f32x4 - #define _mm256_maskz_broadcast_f32x4(k, a) simde_mm256_maskz_broadcast_f32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_f64x2 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_broadcast_f64x2(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - /* I don't have a bug # for this, but when compiled with clang-10 without optimization on aarch64 - * the __builtin_shufflevector version doesn't work correctly. clang 9 and 11 aren't a problem */ - #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION < 100000 || SIMDE_DETECT_CLANG_VERSION > 100000)) - r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[0]; - r_.f64[i + 1] = a_.f64[1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_f64x2 - #define _mm256_broadcast_f64x2(a) simde_mm256_broadcast_f64x2(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_broadcast_f64x2(simde__m256d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_mask_broadcast_f64x2(src, k, a); - #else - return simde_mm256_mask_mov_pd(src, k, simde_mm256_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_broadcast_f64x2 - #define _mm256_mask_broadcast_f64x2(src, k, a) simde_mm256_mask_broadcast_f64x2(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_maskz_broadcast_f64x2(k, a); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_broadcast_f64x2(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_broadcast_f64x2 - #define _mm256_maskz_broadcast_f64x2(k, a) simde_mm256_maskz_broadcast_f64x2(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x4 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f32x4(a); - #else - simde__m512_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = a; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x4 - #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f32x4(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f32x4 - #define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f32x4(k, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f32x4 - #define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcast_f64x4 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f64x4(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f64x4 - #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f64x4(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_f64x4 - #define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f64x4(k, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_f64x4 - #define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i32x4 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i32x4(a); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = a; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i32x4 - #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i32x4(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_i32x4 - #define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i32x4(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_i32x4 - #define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i64x4 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i64x4(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i64x4 - #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i64x4(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcast_i64x4 - #define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i64x4(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcast_i64x4 - #define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastd_epi32(a); - #else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastd_epi32 - #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastd_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastd_epi32 - #define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastd_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastd_epi32 - #define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastq_epi64(a); - #else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastq_epi64 - #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastq_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastq_epi64 - #define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastq_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastq_epi64 - #define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastss_ps(a); - #else - simde__m512_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastss_ps - #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastss_ps(src, k, a); - #else - simde__m512_private - src_ = simde__m512_to_private(src), - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastss_ps - #define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastss_ps(k, a); - #else - simde__m512_private - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastss_ps - #define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastsd_pd(a); - #else - simde__m512d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastsd_pd - #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastsd_pd(src, k, a); - #else - simde__m512d_private - src_ = simde__m512d_to_private(src), - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastsd_pd - #define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastsd_pd(k, a); - #else - simde__m512d_private - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastsd_pd - #define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastb_epi8(a); - #else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi8(a_.i8[0]); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastb_epi8 - #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_broadcastb_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastb_epi8 - #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_broadcastb_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastb_epi8 - #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastw_epi16(a); - #else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi16(a_.i16[0]); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastw_epi16 - #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_BROADCAST_H) */ -/* :: End simde/x86/avx512/broadcast.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpeq.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_CMPEQ_H) -#define SIMDE_X86_AVX512_CMPEQ_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov_mask.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) -#define SIMDE_X86_AVX512_MOV_MASK_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_movepi8_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi8_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi8_mask - #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi16_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi16_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* There is no 32-bit _mm_movemask_* function, so we use - * _mm_movemask_epi8 then extract the odd bits. */ - uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); - r = ( (r >> 1)) & UINT32_C(0x5555); - r = (r | (r >> 1)) & UINT32_C(0x3333); - r = (r | (r >> 2)) & UINT32_C(0x0f0f); - r = (r | (r >> 4)) & UINT32_C(0x00ff); - return HEDLEY_STATIC_CAST(simde__mmask8, r); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi16_mask - #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi32_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi32_mask(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi32_mask - #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi64_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi64_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi64_mask - #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_movepi8_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi8_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask32, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi8_mask - #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_movepi16_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi16_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi16_mask - #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi32_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi32_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi32_mask - #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi64_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi64_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi64_mask - #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_movepi8_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi8_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask64 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); - } - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask64, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi8_mask - #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_movepi16_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi16_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi16_mask - #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_movepi32_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi32_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi32_mask - #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_movepi64_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi64_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi64_mask - #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ -/* :: End simde/x86/avx512/mov_mask.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_CMP_H) -#define SIMDE_X86_AVX512_CMP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setzero.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SETZERO_H) -#define SIMDE_X86_AVX512_SETZERO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setzero_si512(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_si512(); - #else - simde__m512i r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() - #undef _mm512_setzero_epi32 - #define _mm512_setzero_epi32() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setzero_ps(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_ps(); - #else - return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ps - #define _mm512_setzero_ps() simde_mm512_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setzero_pd(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_pd(); - #else - return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_pd - #define _mm512_setzero_pd() simde_mm512_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_setzero_ph(void) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_setzero_ph(); - #else - return simde_mm512_castsi512_ph(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ph - #define _mm512_setzero_ph() simde_mm512_setzero_ph() -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ -/* :: End simde/x86/avx512/setzero.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setone.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SETONE_H) -#define SIMDE_X86_AVX512_SETONE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_setone_si512(void) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } - - return simde__m512i_from_private(r_); -} -#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_setone_ps(void) { - return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_setone_pd(void) { - return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_setone_ph(void) { - return simde_mm512_castsi512_ph(simde_x_mm512_setone_si512()); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ -/* :: End simde/x86/avx512/setone.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmp_epi8_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 != b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi8_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi8_mask(a, b, imm8) _mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi8_mask - #define _mm512_cmp_epi8_mask(a, b, imm8) simde_mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epi32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epi32_mask(a, b, imm8) _mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epi32_mask - #define _mm256_cmp_epi32_mask(a, b, imm8) simde_mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ps_mask - #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps_mask - #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps_mask - #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_pd_mask - #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd_mask - #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd_mask - #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_ph_mask (simde__m512h a, simde__m512h b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512h_private - r_, - a_ = simde__m512h_to_private(a), - b_ = simde__m512h_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i]) - && !simde_isnanhf(a_.f16[i]) && !simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_to_float32(a_.f16[i]) != simde_float16_to_float32(a_.f16[i])) - || (simde_float16_to_float32(b_.f16[i]) != simde_float16_to_float32(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - || simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == a_.f16) & (b_.f16 == b_.f16) & (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - !(simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - && (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ((a_.f16 == a_.f16) & (b_.f16 == b_.f16))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) ? INT16_C(0) : ~INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16) | (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - || (simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512h_to_private(simde_mm512_setzero_ph()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512h_to_private(simde_x_mm512_setone_ph()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde_mm512_castph_si512(simde__m512h_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - #define simde_mm512_cmp_ph_mask(a, b, imm8) _mm512_cmp_ph_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ph_mask - #define _mm512_cmp_ph_mask(a, b, imm8) simde_mm512_cmp_ph_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epi16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 == b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 != b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi16_mask(a, b, imm8) _mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi16_mask - #define _mm512_cmp_epi16_mask(a, b, imm8) simde_mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epi16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epi16_mask -#define _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epi16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epi32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi32_mask(a, b, imm8) _mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi32_mask - #define _mm512_cmp_epi32_mask(a, b, imm8) simde_mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epi64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 == b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 != b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi64_mask(a, b, imm8) _mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi64_mask - #define _mm512_cmp_epi64_mask(a, b, imm8) simde_mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu16_mask - #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu16_mask -#define _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epu32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epu32_mask(a, b, imm8) _mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epu32_mask - #define _mm256_cmp_epu32_mask(a, b, imm8) simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) _mm256_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmp_epu32_mask -#define _mm256_mask_cmp_epu32_mask(a, b, imm8) simde_mm256_mask_cmp_epu32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epu32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu32_mask(a, b, imm8) _mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu32_mask - #define _mm512_cmp_epu32_mask(a, b, imm8) simde_mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu32_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu32_mask -#define _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu32_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epu64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 == b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 != b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu64_mask(a, b, imm8) _mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu64_mask - #define _mm512_cmp_epu64_mask(a, b, imm8) simde_mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu64_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu64_mask -#define _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu64_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ -/* :: End simde/x86/avx512/cmp.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpeq_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[i], b_.m256i[i]))); - r |= HEDLEY_STATIC_CAST(uint64_t, t) << (i * 32); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi8_mask - #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpeq_epi8_mask(simde__mmask64 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpeq_epi8_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi8_mask - #define _mm512_mask_cmpeq_epi8_mask(k1, a, b) simde_mm512_mask_cmpeq_epi8_mask((k1), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi32_mask - #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi32_mask - #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi64_mask - #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi64_mask - #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpeq_epu16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpeq_epu16_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask32 r; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.u16), a_.u16 == b_.u16); - r = simde_mm512_movepi16_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r |= (a_.u16[i] == b_.u16[i]) ? (UINT16_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epu16_mask - #define _mm512_cmpeq_epu16_mask(a, b) simde_mm512_cmpeq_epu16_mask((a), (b)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpeq_epu16_mask(simde__mmask32 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpeq_epu16_mask(k1, a, b); - #else - return k1 & simde_mm512_cmpeq_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epu16_mask - #define _mm512_mask_cmpeq_epu16_mask(k1, a, b) simde_mm512_mask_cmpeq_epu16_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_ps_mask - #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_pd_mask - #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPEQ_H) */ -/* :: End simde/x86/avx512/cmpeq.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpge.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Christopher Moore - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_CMPGE_H) -#define SIMDE_X86_AVX512_CMPGE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/movm.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_MOVM_H) -#define SIMDE_X86_AVX512_MOVM_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi8 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi8(k); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - const simde__m128i zero = simde_mm_setzero_si128(); - const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); - const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_shuffle_epi8(r, shuffle); - r = simde_mm_cmpgt_epi8(zero, r); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int8_t pos_data[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; - int8x8_t pos = vld1_s8(pos_data); - r_.neon_i8 = vcombine_s8( - vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k)), pos), 7), - vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k >> 8)), pos), 7)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi8 - #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi8 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi8(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i zero = simde_mm256_setzero_si256(); - const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); - const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); - simde__m256i r; - - r = simde_mm256_set_m128i(_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), _mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); - r = simde_mm256_mullo_epi16(r, bits); - r = simde_mm256_shuffle_epi8(r, shuffle); - r = simde_mm256_cmpgt_epi8(zero, r); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi8 - #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi8 (simde__mmask64 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movm_epi8(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); - r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi8 - #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi16 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi16(k); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi16(r, 15); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int16_t pos_data[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; - const int16x8_t pos = vld1q_s16(pos_data); - r_.neon_i16 = vshrq_n_s16(vshlq_s16(vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, k)), pos), 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi16 - #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi16 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi16(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i bits = _mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - __m256i r; - - r = _mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = _mm256_mullo_epi16(r, bits); - r = _mm256_srai_epi16(r, 15); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); - r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi16 - #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi16 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm512_movm_epi16(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi16 - #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i shifts = _mm_set_epi32(28, 29, 30, 31); - __m128i r; - - r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm_sllv_epi32(r, shifts); - r = _mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int32_t pos_data[] = { 31, 30, 29, 28 }; - const int32x4_t pos = vld1q_s32(pos_data); - r_.neon_i32 = vshrq_n_s32(vshlq_s32(vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, k)), pos), 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi32 - #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i shifts = _mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); - __m256i r; - - r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm256_sllv_epi32(r, shifts); - r = _mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi32(k ); - r_.m128i[1] = simde_mm_movm_epi32(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi32 - #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi32 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi32(k); - #else - simde__m512i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); - r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi32 - #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi64(k); - /* N.B. CM: These fallbacks may not be faster as there are only two elements */ - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m128i shifts = _mm_set_epi32(30, 30, 31, 31); - __m128i r; - - r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm_sllv_epi32(r, shifts); - r = _mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const int64_t pos_data[] = { 63, 62 }; - const int64x2_t pos = vld1q_s64(pos_data); - r_.neon_i64 = vshrq_n_s64(vshlq_s64(vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, k)), pos), 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi64 - #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi64(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const __m256i shifts = _mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); - __m256i r; - - r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = _mm256_sllv_epi32(r, shifts); - r = _mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_movm_epi64(k ); - r_.m128i[1] = simde_mm_movm_epi64(k >> 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi64 - #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi64(k); - #else - simde__m512i_private r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_movm_epi64(k ); - r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi64 - #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOVM_H) */ -/* :: End simde/x86/avx512/movm.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmpge_epi8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpge(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpge_epi8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epi8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi8_mask - #define _mm_cmpge_epi8_mask(a, b) simde_mm_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpge_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi8_mask - #define _mm_mask_cmpge_epi8_mask(k, a, b) simde_mm_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmpge_epi8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpge_epi8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epi8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi8_mask - #define _mm256_cmpge_epi8_mask(a, b) simde_mm256_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpge_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi8_mask - #define _mm256_mask_cmpge_epi8_mask(k, a, b) simde_mm256_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmpge_epi8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epi8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi8_mask - #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpge_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epi8_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi8_mask - #define _mm512_mask_cmpge_epi8_mask(k, a, b) simde_mm512_mask_cmpge_epi8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi8(_mm_cmpge_epu8_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a_.altivec_u8, b_.altivec_u8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_cmpge_epu8_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epu8_mask(a, b); - #else - return simde_mm_movepi8_mask(simde_x_mm_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu8_mask - #define _mm_cmpge_epu8_mask(a, b) simde_mm_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_mask_cmpge_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu8_mask - #define _mm_mask_cmpge_epu8_mask(k, a, b) simde_mm_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi8(_mm256_cmpge_epu8_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_cmpge_epu8_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epu8_mask(a, b); - #else - return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu8_mask - #define _mm256_cmpge_epu8_mask(a, b) simde_mm256_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_mask_cmpge_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu8_mask - #define _mm256_mask_cmpge_epu8_mask(k, a, b) simde_mm256_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi8(_mm512_cmpge_epu8_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu8(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epu8_mask(a, b); - #else - return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu8_mask - #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_cmpge_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epu8_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu8_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu8_mask - #define _mm512_mask_cmpge_epu8_mask(k, a, b) simde_mm512_mask_cmpge_epu8_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmpge_epi16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpge(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epi16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi16_mask - #define _mm_cmpge_epi16_mask(a, b) simde_mm_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi16_mask - #define _mm_mask_cmpge_epi16_mask(k, a, b) simde_mm_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmpge_epi16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpge_epi16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epi16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi16_mask - #define _mm256_cmpge_epi16_mask(a, b) simde_mm256_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpge_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi16_mask - #define _mm256_mask_cmpge_epi16_mask(k, a, b) simde_mm256_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmpge_epi16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpge_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epi16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi16_mask - #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpge_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epi16_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi16_mask - #define _mm512_mask_cmpge_epi16_mask(k, a, b) simde_mm512_mask_cmpge_epi16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movm_epi16(_mm_cmpge_epu16_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a_.altivec_u16, b_.altivec_u16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu16_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cmpge_epu16_mask(a, b); - #else - return simde_mm_movepi16_mask(simde_x_mm_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu16_mask - #define _mm_cmpge_epu16_mask(a, b) simde_mm_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu16_mask - #define _mm_mask_cmpge_epu16_mask(k, a, b) simde_mm_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm256_movm_epi16(_mm256_cmpge_epu16_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_cmpge_epu16_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cmpge_epu16_mask(a, b); - #else - return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu16_mask - #define _mm256_cmpge_epu16_mask(a, b) simde_mm256_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_mask_cmpge_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu16_mask - #define _mm256_mask_cmpge_epu16_mask(k, a, b) simde_mm256_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return simde_mm512_movm_epi16(_mm512_cmpge_epu16_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu16(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpge_epu16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epu16_mask(a, b); - #else - return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu16_mask - #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_cmpge_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cmpge_epu16_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu16_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu16_mask - #define _mm512_mask_cmpge_epu16_mask(k, a, b) simde_mm512_mask_cmpge_epu16_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmpge_epi32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpge(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epi32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi32_mask - #define _mm_cmpge_epi32_mask(a, b) simde_mm_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi32_mask - #define _mm_mask_cmpge_epi32_mask(k, a, b) simde_mm_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmpge_epi32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epi32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epi32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi32_mask - #define _mm256_cmpge_epi32_mask(a, b) simde_mm256_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi32_mask - #define _mm256_mask_cmpge_epi32_mask(k, a, b) simde_mm256_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmpge_epi32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpge_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epi32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi32_mask - #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpge_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epi32_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi32_mask - #define _mm512_mask_cmpge_epi32_mask(k, a, b) simde_mm512_mask_cmpge_epi32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi32(_mm_cmpge_epu32_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a_.altivec_u32, b_.altivec_u32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu32_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epu32_mask(a, b); - #else - return simde_mm_movepi32_mask(simde_x_mm_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu32_mask - #define _mm_cmpge_epu32_mask(a, b) simde_mm_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu32_mask - #define _mm_mask_cmpge_epu32_mask(k, a, b) simde_mm_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi32(_mm256_cmpge_epu32_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epu32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epu32_mask(a, b); - #else - return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu32_mask - #define _mm256_cmpge_epu32_mask(a, b) simde_mm256_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu32_mask - #define _mm256_mask_cmpge_epu32_mask(k, a, b) simde_mm256_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi32(_mm512_cmpge_epu32_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpge_epu32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epu32_mask(a, b); - #else - return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu32_mask - #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpge_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epu32_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu32_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu32_mask - #define _mm512_mask_cmpge_epu32_mask(k, a, b) simde_mm512_mask_cmpge_epu32_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmpge_epi64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpge(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epi64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epi64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epi64_mask - #define _mm_cmpge_epi64_mask(a, b) simde_mm_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epi64_mask - #define _mm_mask_cmpge_epi64_mask(k, a, b) simde_mm_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmpge_epi64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epi64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epi64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epi64_mask - #define _mm256_cmpge_epi64_mask(a, b) simde_mm256_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epi64_mask - #define _mm256_mask_cmpge_epi64_mask(k, a, b) simde_mm256_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmpge_epi64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpge_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epi64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi64_mask - #define _mm512_cmpge_epi64_mask(a, b) simde_mm512_cmpge_epi64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epi64_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epi64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epi64_mask - #define _mm512_mask_cmpge_epi64_mask(k, a, b) simde_mm512_mask_cmpge_epi64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cmpge_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm_movm_epi64(_mm_cmpge_epu64_mask(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a_.altivec_u64, b_.altivec_u64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_cmpge_epu64_mask (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_cmpge_epu64_mask(a, b); - #else - return simde_mm_movepi64_mask(simde_x_mm_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpge_epu64_mask - #define _mm_cmpge_epu64_mask(a, b) simde_mm_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_cmpge_epu64_mask - #define _mm_mask_cmpge_epu64_mask(k, a, b) simde_mm_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_cmpge_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm256_movm_epi64(_mm256_cmpge_epu64_mask(a, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmpge_epu64_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_cmpge_epu64_mask(a, b); - #else - return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpge_epu64_mask - #define _mm256_cmpge_epu64_mask(a, b) simde_mm256_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm256_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmpge_epu64_mask - #define _mm256_mask_cmpge_epu64_mask(k, a, b) simde_mm256_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_cmpge_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return simde_mm512_movm_epi64(_mm512_cmpge_epu64_mask(a, b)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); - } - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_cmpge_epu64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpge_epu64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpge_epu64_mask(a, b); - #else - return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu64_mask - #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpge_epu64_mask(k, a, b); - #else - return k & simde_mm512_cmpge_epu64_mask(a, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpge_epu64_mask - #define _mm512_mask_cmpge_epu64_mask(k, a, b) simde_mm512_mask_cmpge_epu64_mask((k), (a), (b)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPGE_H) */ -/* :: End simde/x86/avx512/cmpge.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmpgt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CMPGT_H) -#define SIMDE_X86_AVX512_CMPGT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[i], b_.m256i[i]))); - r |= HEDLEY_STATIC_CAST(uint64_t, t) << HEDLEY_STATIC_CAST(uint64_t, i * 32); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi8_mask - #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epu8_mask - #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmpgt_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epi16_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi16(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi16_mask - #define _mm512_cmpgt_epi16_mask(a, b) simde_mm512_cmpgt_epi16_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi32_mask - #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi32_mask - #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi64_mask - #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi64_mask - #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPGT_H) */ -/* :: End simde/x86/avx512/cmpgt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmplt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_CMPLT_H) -#define SIMDE_X86_AVX512_CMPLT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_ps_mask - #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_pd_mask - #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epi8_mask - #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epu8_mask - #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMPLT_H) */ -/* :: End simde/x86/avx512/cmplt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/extract.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_EXTRACT_H) -#define SIMDE_X86_AVX512_EXTRACT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf32x4_ps (simde__m256 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - - return a_.m128[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_extractf32x4_ps(a, imm8) _mm256_extractf32x4_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf32x4_ps - #define _mm256_extractf32x4_ps(a, imm8) simde_mm256_extractf32x4_ps((a), (imm8)) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512_private a_ = simde__m512_to_private(a); - - /* GCC 6 generates an ICE */ - #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(7,0,0) - return a_.m128[imm8 & 3]; - #else - simde__m128_private r_; - const size_t offset = HEDLEY_STATIC_CAST(size_t, imm8 & 3) * (sizeof(r_.f32) / sizeof(r_.f32[0])); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i + offset]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf32x4_ps - #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps((src), (k), simde_mm512_extractf32x4_ps((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf32x4_ps - #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) -#else - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps((k), simde_mm512_extractf32x4_ps((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf32x4_ps - #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps((k), (a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_extractf32x8_ps (simde__m512 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512_private a_ = simde__m512_to_private(a); - - return a_.m256[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_extractf32x8_ps(a, imm8) _mm512_extractf32x8_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf32x8_ps - #define _mm512_extractf32x8_ps(a, imm8) simde_mm512_extractf32x8_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - return a_.m256d[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf64x4_pd - #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf64x4_pd - #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) -#else - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf64x4_pd - #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m128i[imm8 & 3]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti32x4_epi32 - #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti32x4_epi32 - #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti32x4_epi32 - #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_extracti32x8_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m256i[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti32x8_epi32(a, imm8) _mm512_extracti32x8_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti32x8_epi32 - #define _mm512_extracti32x8_epi32(a, imm8) simde_mm512_extracti32x8_epi32((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX51FDQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti32x8_epi32(src, k, a, imm8) _mm512_mask_extracti32x8_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti32x8_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32((src), (k), simde_mm512_extracti32x8_epi32((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti32x8_epi32 - #define _mm512_mask_extracti32x8_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x8_epi32((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti32x8_epi32(k, a, imm8) _mm512_maskz_extracti32x8_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_extracti32x8_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32((k), simde_mm512_extracti32x8_epi32((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti32x8_epi32 - #define _mm512_maskz_extracti32x8_epi32(k, a, imm8) simde_mm512_maskz_extracti32x8_epi32((k), (a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m256i[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti64x4_epi64 - #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64((a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64((src), (k), simde_mm512_extracti64x4_epi64((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti64x4_epi64 - #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64((src), (k), (a), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) -#else - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64((k), simde_mm512_extracti64x4_epi64((a), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti64x4_epi64 - #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64((k), (a), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_EXTRACT_H) */ -/* :: End simde/x86/avx512/extract.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/insert.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_INSERT_H) -#define SIMDE_X86_AVX512_INSERT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - simde__m512 r; - switch(imm8) { - case 0: r = _mm512_insertf32x4(a, b, 0); break; - case 1: r = _mm512_insertf32x4(a, b, 1); break; - case 2: r = _mm512_insertf32x4(a, b, 2); break; - case 3: r = _mm512_insertf32x4(a, b, 3); break; - default: HEDLEY_UNREACHABLE(); r = simde_mm512_setzero_ps(); break; - } - return r; - #else - simde__m512_private a_ = simde__m512_to_private(a); - - a_.m128[imm8 & 3] = b; - - return simde__m512_from_private(a_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf32x4 - #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_insertf32x4 (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512 r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_mask_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_mask_mov_ps(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf32x4 - #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_insertf32x4 (simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512 r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_maskz_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_maskz_mov_ps(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf32x4 - #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - a_.m256d[imm8 & 1] = b; - - return simde__m512d_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf64x4 - #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_insertf64x4 (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_mask_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_mask_mov_pd(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf64x4 - #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_insertf64x4 (simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_maskz_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_maskz_mov_pd(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf64x4 - #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m128i[imm8 & 3] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti32x4 - #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti32x4 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_mask_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi32(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti32x4 - #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti32x4 (simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - SIMDE_CONSTIFY_4_(_mm512_maskz_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi32(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti32x4 - #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m256i[imm8 & 1] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti64x4 - #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti64x4 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_mask_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi64(src, k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti64x4 - #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti64x4 (simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { - simde__m512i r; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_CONSTIFY_2_(_mm512_maskz_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi64(k, r); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti64x4 - #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_insertf32x8 (simde__m512 a, simde__m256 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512_private a_ = simde__m512_to_private(a); - - a_.m256[imm8 & 1] = b; - - return simde__m512_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_insertf32x8(a, b, imm8) _mm512_insertf32x8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf32x8 - #define _mm512_insertf32x8(a, b, imm8) simde_mm512_insertf32x8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_insertf32x8(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512 r; - SIMDE_CONSTIFY_2_(_mm512_mask_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); - return r; - #else - simde__m512 r; - SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_mask_mov_ps(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf32x8 - #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_insertf32x8(simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512 r; - SIMDE_CONSTIFY_2_(_mm512_maskz_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); - return r; - #else - simde__m512 r; - SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); - return simde_mm512_maskz_mov_ps(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf32x8 - #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_insertf64x2 (simde__m512d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - a_.m128d[imm8 & 3] = b; - - return simde__m512d_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_insertf64x2(a, b, imm8) _mm512_insertf64x2(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf64x2 - #define _mm512_insertf64x2(a, b, imm8) simde_mm512_insertf64x2(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_insertf64x2(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512d r; - SIMDE_CONSTIFY_4_(_mm512_mask_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); - return r; - #else - simde__m512d r; - SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_mask_mov_pd(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf64x2 - #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_insertf64x2(simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512d r; - SIMDE_CONSTIFY_4_(_mm512_maskz_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); - return r; - #else - simde__m512d r; - SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); - return simde_mm512_maskz_mov_pd(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf64x2 - #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti32x8 (simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m256i[imm8 & 1] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_inserti32x8(a, b, imm8) _mm512_inserti32x8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti32x8 - #define _mm512_inserti32x8(a, b, imm8) simde_mm512_inserti32x8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti32x8(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_2_(_mm512_mask_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, src, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); - return simde_mm512_mask_mov_epi32(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti32x8 - #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti32x8(simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_2_(_mm512_maskz_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi32(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti32x8 - #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti64x2 (simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m128i[imm8 & 3] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_inserti64x2(a, b, imm8) _mm512_inserti64x2(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti64x2 - #define _mm512_inserti64x2(a, b, imm8) simde_mm512_inserti64x2(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_inserti64x2(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_4_(_mm512_mask_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_mask_mov_epi64(src, k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti64x2 - #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_inserti64x2(simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - simde__m512i r; - SIMDE_CONSTIFY_4_(_mm512_maskz_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); - return r; - #else - simde__m512i r; - SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); - return simde_mm512_maskz_mov_epi64(k, r); - #endif - } -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti64x2 - #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_INSERT_H) */ -/* :: End simde/x86/avx512/insert.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/kshift.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_KSHIFT_H) -#define SIMDE_X86_AVX512_KSHIFT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask16 - #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 31) ? (a << count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask32 - #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 63) ? (a << count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask64 - #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _kshiftli_mask8 - #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask16 - #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 31) ? (a >> count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask32 - #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return (count <= 63) ? (a >> count) : 0; -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask64 - #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); -} -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) - #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _kshiftri_mask8 - #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ -/* :: End simde/x86/avx512/kshift.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/permutex2var.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) -#define SIMDE_X86_AVX512_PERMUTEX2VAR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/andnot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_ANDNOT_H) -#define SIMDE_X86_AVX512_ANDNOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) -#else - #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_ps - #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_ps - #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_ps - #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) -#else - #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_pd - #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_pd - #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_pd - #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_andnot_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) -#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_si512 - #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi32 - #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi64 - #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi32 - #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi32 - #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi64 - #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi64 - #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ -/* :: End simde/x86/avx512/andnot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/blend.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_BLEND_H) -#define SIMDE_X86_AVX512_BLEND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_blend_epi8(k, a, b); - #else - return simde_mm_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi8 - #define _mm_mask_blend_epi8(k, a, b) simde_mm_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_blend_epi16(k, a, b); - #else - return simde_mm_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi16 - #define _mm_mask_blend_epi16(k, a, b) simde_mm_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_epi32(k, a, b); - #else - return simde_mm_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi32 - #define _mm_mask_blend_epi32(k, a, b) simde_mm_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_blend_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_epi64(k, a, b); - #else - return simde_mm_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_epi64 - #define _mm_mask_blend_epi64(k, a, b) simde_mm_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_blend_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_ps(k, a, b); - #else - return simde_mm_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_ps - #define _mm_mask_blend_ps(k, a, b) simde_mm_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_blend_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_blend_pd(k, a, b); - #else - return simde_mm_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_blend_pd - #define _mm_mask_blend_pd(k, a, b) simde_mm_mask_blend_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_blend_epi8(k, a, b); - #else - return simde_mm256_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi8 - #define _mm256_mask_blend_epi8(k, a, b) simde_mm256_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_blend_epi16(k, a, b); - #else - return simde_mm256_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi16 - #define _mm256_mask_blend_epi16(k, a, b) simde_mm256_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_epi32(k, a, b); - #else - return simde_mm256_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi32 - #define _mm256_mask_blend_epi32(k, a, b) simde_mm256_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_blend_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_epi64(k, a, b); - #else - return simde_mm256_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_epi64 - #define _mm256_mask_blend_epi64(k, a, b) simde_mm256_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_blend_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_ps(k, a, b); - #else - return simde_mm256_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_ps - #define _mm256_mask_blend_ps(k, a, b) simde_mm256_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_blend_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_blend_pd(k, a, b); - #else - return simde_mm256_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_blend_pd - #define _mm256_mask_blend_pd(k, a, b) simde_mm256_mask_blend_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_blend_epi8(k, a, b); - #else - return simde_mm512_mask_mov_epi8(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi8 - #define _mm512_mask_blend_epi8(k, a, b) simde_mm512_mask_blend_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_blend_epi16(k, a, b); - #else - return simde_mm512_mask_mov_epi16(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi16 - #define _mm512_mask_blend_epi16(k, a, b) simde_mm512_mask_blend_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi32(k, a, b); - #else - return simde_mm512_mask_mov_epi32(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi32 - #define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi64(k, a, b); - #else - return simde_mm512_mask_mov_epi64(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_epi64 - #define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_ps(k, a, b); - #else - return simde_mm512_mask_mov_ps(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_ps - #define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_pd(k, a, b); - #else - return simde_mm512_mask_mov_pd(a, k, b); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_blend_pd - #define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_BLEND_H) */ -/* :: End simde/x86/avx512/blend.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/or.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_OR_H) -#define SIMDE_X86_AVX512_OR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_or_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_ps - #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_ps - #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_ps - #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_or_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_pd - #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_pd - #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_pd - #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 | b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] | b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi32 - #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi32 - #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi32 - #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi64 - #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi64 - #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi64 - #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_si512 - #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ -/* :: End simde/x86/avx512/or.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/slli.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SLLI_H) -#define SIMDE_X86_AVX512_SLLI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi16 (simde__m512i a, const unsigned int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_16_(_mm512_slli_epi16, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi16(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - if(imm8 < 16) - r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); - else - return simde_mm512_setzero_si512(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi16 - #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { - /* I guess the restriction was added in 6.4, back-ported to 5.5, then - * removed (fixed) in 7? */ - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_32_(_mm512_slli_epi32, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi32 - #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_64_(_mm512_slli_epi64, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi64(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) - r_.u64 = a_.u64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi64 - #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SLLI_H) */ -/* :: End simde/x86/avx512/slli.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/srli.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SRLI_H) -#define SIMDE_X86_AVX512_SRLI_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi16 (simde__m512i a, const unsigned int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_16_(_mm512_srli_epi16, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi16(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) - return simde_mm512_setzero_si512(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi16 - #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_32_(_mm512_srli_epi32, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi32 - #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) - simde__m512i r; - - SIMDE_CONSTIFY_64_(_mm512_srli_epi64, r, simde_mm512_setzero_si512(), imm8, a); - - return r; - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi64(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff" here. However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) - r_.u64 = a_.u64 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi64 - #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SRLI_H) */ -/* :: End simde/x86/avx512/srli.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/test.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - * 2020 Christopher Moore - * 2021 Andrew Rodriguez - */ - -#if !defined(SIMDE_X86_AVX512_TEST_H) -#define SIMDE_X86_AVX512_TEST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_test_epi32_mask (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_test_epi32_mask(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_test_epi32_mask -#define _mm256_test_epi32_mask(a, b) simde_mm256_test_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_mask_test_epi32_mask (simde__mmask8 k1, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_test_epi32_mask(k1, a, b); - #else - return simde_mm256_test_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_test_epi32_mask - #define _mm256_mask_test_epi32_mask(k1, a, b) simde_mm256_mask_test_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_test_epi16_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask32 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi16_mask - #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_test_epi32_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi32_mask -#define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_test_epi64_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi64_mask - #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_test_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_test_epi8_mask - #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_test_epi16_mask(k1, a, b); - #else - return simde_mm512_test_epi16_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi16_mask - #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi32_mask(k1, a, b); - #else - return simde_mm512_test_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi32_mask - #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi64_mask(k1, a, b); - #else - return simde_mm512_test_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi64_mask - #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_test_epi8_mask(k1, a, b); - #else - return simde_mm512_test_epi8_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi8_mask - #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TEST_H) */ -/* :: End simde/x86/avx512/test.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The following generic code avoids many, nearly identical, repetitions of fairly complex code. - * If the compiler optimizes well, in particular extracting invariant code from loops - * and simplifying code involving constants passed as arguments, it should not be - * significantly slower than specific code. - * Note that when the original vector contains few elements, these implementations - * may not be faster than portable code. - */ -#if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC -#endif - -#if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { - const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; - - #if defined(SIMDE_X86_SSE3_NATIVE) - __m128i ra, rb, t, test, select, index; - const __m128i sixteen = _mm_set1_epi8(16); - - /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ - switch (log2_index_size) { - default: /* Avoid uninitialized variable warning/error */ - case 0: - index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); - break; - case 1: - index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); - index = _mm_slli_epi32(index, 1); - t = _mm_slli_epi32(index, 8); - index = _mm_or_si128(index, t); - index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); - break; - case 2: - index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); - index = _mm_slli_epi32(index, 2); - t = _mm_slli_epi32(index, 8); - index = _mm_or_si128(index, t); - t = _mm_slli_epi32(index, 16); - index = _mm_or_si128(index, t); - index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); - break; - } - - test = index; - index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); - test = _mm_cmpgt_epi8(test, index); - - ra = _mm_shuffle_epi8(a[0], index); - rb = _mm_shuffle_epi8(b[0], index); - - #if defined(SIMDE_X86_SSE4_1_NATIVE) - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - select = _mm_cmplt_epi8(index, sixteen); - index = _mm_sub_epi8(index, sixteen); - ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); - rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); - } - - return _mm_blendv_epi8(ra, rb, test); - #else - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - select = _mm_cmplt_epi8(index, sixteen); - index = _mm_sub_epi8(index, sixteen); - ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); - rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); - } - - return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); - #endif - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16_t index, r; - uint16x8_t index16; - uint32x4_t index32; - uint8x16x2_t table2_a, table2_b; - uint8x16x4_t table4_a, table4_b; - - switch (log2_index_size) { - case 0: - index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); - break; - case 1: - index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); - index16 = vmulq_n_u16(index16, 0x0202); - index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); - index = vreinterpretq_u8_u16(index16); - break; - case 2: - index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); - index32 = vmulq_n_u32(index32, 0x04040404); - index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); - index = vreinterpretq_u8_u32(index32); - break; - } - - uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1)); - - switch (log2_data_length) { - case 0: - r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); - break; - case 1: - table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); - table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); - table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); - table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); - r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); - break; - case 2: - table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); - table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); - table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); - table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); - table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); - table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); - table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); - table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); - r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); - break; - } - - return simde__m128i_from_neon_u8(r); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; - SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; - - switch (log2_index_size) { - default: /* Avoid uninitialized variable warning/error */ - case 0: - index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); - break; - case 1: - index16 = simde__m128i_to_altivec_u16(idx); - index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); - index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); - break; - case 2: - index32 = simde__m128i_to_altivec_u32(idx); - index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); - - /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ - temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); - index32 = vec_add(index32, temp32); - index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), - vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), - vec_splat_u16(0))); - - index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); - index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); - break; - } - - if (log2_data_length == 0) { - r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); - } - else { - s = index; - index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); - test = vec_cmpgt(s, index); - - ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); - rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); - - SIMDE_VECTORIZE - for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { - select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); - index = vec_sub(index, thirty_two); - t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); - ra = vec_sel(t, ra, select); - t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); - rb = vec_sel(t, rb, select); - } - - r = vec_sel(ra, rb, test); - } - - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sixteen = wasm_i8x16_splat(16); - - v128_t index = simde__m128i_to_wasm_v128(idx); - - switch (log2_index_size) { - case 0: - index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); - break; - case 1: - index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); - index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); - index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); - break; - case 2: - index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); - index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); - index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); - break; - } - - v128_t r = wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); - - SIMDE_VECTORIZE - for (int i = 1 ; i < (1 << log2_data_length) ; i++) { - index = wasm_i8x16_sub(index, sixteen); - r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); - } - - SIMDE_VECTORIZE - for (int i = 0 ; i < (1 << log2_data_length) ; i++) { - index = wasm_i8x16_sub(index, sixteen); - r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); - } - - return simde__m128i_from_wasm_v128(r); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { - SIMDE_VECTORIZE - for (int i = 0 ; i < (1 << log2_data_length) ; i++) { - r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); - } -} -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi16(a, idx, b); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi16 - #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi16 -#define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi16 -#define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi16 -#define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi32(a, idx, b); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi32 - #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi32 -#define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi32 -#define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi32 -#define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi64(a, idx, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi64 - #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi64 -#define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi64 -#define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi64 -#define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde__m128i r; - - simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); - - return r; - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - idx_ = simde__m128i_to_private(idx), - b_ = simde__m128i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_epi8 - #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_epi8 -#define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_epi8 -#define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_epi8 -#define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_pd(a, idx, b); - #else - return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_pd - #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_pd -#define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_pd -#define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_pd -#define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_permutex2var_ps(a, idx, b); - #else - return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_permutex2var_ps - #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_permutex2var_ps -#define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask2_permutex2var_ps -#define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_permutex2var_ps -#define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi16(a, idx, b); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; - const __m256i ones = _mm256_set1_epi16(1); - - idx2 = _mm256_srli_epi32(idx, 1); - - ta = _mm256_permutevar8x32_epi32(a, idx2); - tb = _mm256_permutevar8x32_epi32(b, idx2); - select = _mm256_slli_epi32(idx2, 28); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - idx2 = _mm256_srli_epi32(idx2, 16); - - ta = _mm256_permutevar8x32_epi32(a, idx2); - tb = _mm256_permutevar8x32_epi32(b, idx2); - select = _mm256_slli_epi32(idx2, 28); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - - lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); - hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); - - select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); - return _mm256_blendv_epi8(lo, hi, select); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi16 - #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi16 -#define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi16 -#define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi16 -#define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi32(a, idx, b); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i ta, tb, select; - ta = _mm256_permutevar8x32_epi32(a, idx); - tb = _mm256_permutevar8x32_epi32(b, idx); - select = _mm256_slli_epi32(idx, 28); - return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi32 - #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi32 -#define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi32 -#define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi32 -#define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi64(a, idx, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi64 - #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi64 -#define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi64 -#define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi64 -#define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); - #elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t0, t1, index, select0x10, select0x20, a01, b01; - const __m256i mask = _mm256_set1_epi8(0x3F); - const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - - index = _mm256_and_si256(idx, mask); - t0 = _mm256_shuffle_epi8(a0, index); - t1 = _mm256_shuffle_epi8(a1, index); - select0x10 = _mm256_slli_epi64(index, 3); - a01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(b0, index); - t1 = _mm256_shuffle_epi8(b1, index); - b01 = _mm256_blendv_epi8(t0, t1, select0x10); - select0x20 = _mm256_slli_epi64(index, 2); - return _mm256_blendv_epi8(a01, b01, select0x20); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_epi8 - #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_epi8 -#define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_epi8 -#define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_epi8 -#define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_pd(a, idx, b); - #else - return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_pd - #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_pd -#define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_pd -#define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_pd -#define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_permutex2var_ps(a, idx, b); - #else - return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutex2var_ps - #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_permutex2var_ps -#define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask2_permutex2var_ps -#define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_permutex2var_ps -#define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_epi16(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; - const __m256i ones = _mm256_set1_epi16(1); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - idx1 = idx_.m256i[i]; - idx2 = _mm256_srli_epi32(idx1, 1); - - select = _mm256_slli_epi32(idx2, 27); - ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); - hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - select = _mm256_add_epi32(select, select); - hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), - _mm256_castsi256_ps(hilo1), - _mm256_castsi256_ps(select))); - - idx2 = _mm256_srli_epi32(idx2, 16); - - select = _mm256_slli_epi32(idx2, 27); - ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); - hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); - tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), - _mm256_castsi256_ps(tb), - _mm256_castsi256_ps(select))); - select = _mm256_add_epi32(select, select); - hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), - _mm256_castsi256_ps(hilo2), - _mm256_castsi256_ps(select))); - - lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); - hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); - - select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); - r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi16 - #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_epi16(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi16 -#define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_epi16(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi16 -#define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_epi16(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi16 -#define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_epi32(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i index, t0, t1, a01, b01, select; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = idx_.m256i[i]; - t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); - t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); - select = _mm256_slli_epi32(index, 28); - a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), - _mm256_castsi256_ps(t1), - _mm256_castsi256_ps(select))); - t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); - t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); - b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), - _mm256_castsi256_ps(t1), - _mm256_castsi256_ps(select))); - select = _mm256_slli_epi32(index, 27); - r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), - _mm256_castsi256_ps(b01), - _mm256_castsi256_ps(select))); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi32 - #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi32 -#define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi32 -#define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi32 -#define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_epi64(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi64 - #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi64 -#define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi64 -#define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi64 -#define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_permutex2var_epi8(a, idx, b); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - __m512i hilo, hi, lo, hi2, lo2, idx2; - const __m512i ones = _mm512_set1_epi8(1); - const __m512i low_bytes = _mm512_set1_epi16(0x00FF); - - idx2 = _mm512_srli_epi16(idx, 1); - hilo = _mm512_permutex2var_epi16(a, idx2, b); - __mmask64 mask = _mm512_test_epi8_mask(idx, ones); - lo = _mm512_and_si512(hilo, low_bytes); - hi = _mm512_srli_epi16(hilo, 8); - - idx2 = _mm512_srli_epi16(idx, 9); - hilo = _mm512_permutex2var_epi16(a, idx2, b); - lo2 = _mm512_slli_epi16(hilo, 8); - hi2 = _mm512_andnot_si512(low_bytes, hilo); - - lo = _mm512_or_si512(lo, lo2); - hi = _mm512_or_si512(hi, hi2); - - return _mm512_mask_blend_epi8(mask, lo, hi); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; - const __m256i mask = _mm256_set1_epi8(0x7F); - const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); - const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { - index = _mm256_and_si256(idx_.m256i[i], mask); - t0 = _mm256_shuffle_epi8(a0, index); - t1 = _mm256_shuffle_epi8(a1, index); - select0x10 = _mm256_slli_epi64(index, 3); - t01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(a2, index); - t1 = _mm256_shuffle_epi8(a3, index); - t23 = _mm256_blendv_epi8(t0, t1, select0x10); - select0x20 = _mm256_slli_epi64(index, 2); - a0123 = _mm256_blendv_epi8(t01, t23, select0x20); - t0 = _mm256_shuffle_epi8(b0, index); - t1 = _mm256_shuffle_epi8(b1, index); - t01 = _mm256_blendv_epi8(t0, t1, select0x10); - t0 = _mm256_shuffle_epi8(b2, index); - t1 = _mm256_shuffle_epi8(b3, index); - t23 = _mm256_blendv_epi8(t0, t1, select0x10); - b0123 = _mm256_blendv_epi8(t01, t23, select0x20); - select0x40 = _mm256_slli_epi64(index, 1); - r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); - } - #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) - simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi8 - #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask_permutex2var_epi8(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi8 -#define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_mask2_permutex2var_epi8(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi8 -#define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512VBMI_NATIVE) - return _mm512_maskz_permutex2var_epi8(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi8 -#define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_pd(a, idx, b); - #else - return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_pd - #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_pd -#define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_pd -#define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_pd -#define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutex2var_ps(a, idx, b); - #else - return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_ps - #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_ps -#define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_ps -#define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_ps -#define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) */ -/* :: End simde/x86/avx512/permutex2var.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/shuffle.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - * 2023 Michael R. Crusoe - */ - -#if !defined(SIMDE_X86_AVX512_SHUFFLE_H) -#define SIMDE_X86_AVX512_SHUFFLE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_shuffle_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_shuffle_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] & 0x80) ? 0 : a_.i8[(b_.i8[i] & 0x0f) + (i & 0x30)]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_epi8 - #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_shuffle_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_shuffle_epi8 - #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_shuffle_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_shuffle_epi8 - #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) -# define simde_mm512_shuffle_epi32(a, imm8) _mm512_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shuffle_epi32(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shuffle_epi32_r_, \ - simde_mm512_shuffle_epi32_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shuffle_epi32_r_.m128i[0] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[0], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[1] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[1], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[2] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[2], (imm8)); \ - simde_mm512_shuffle_epi32_r_.m128i[3] = simde_mm_shuffle_epi32( \ - simde_mm512_shuffle_epi32_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shuffle_epi32_r_); \ - })) -#else -# define simde_mm512_shuffle_epi32(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 3), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 2), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm512_extracti32x4_epi32(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_epi32 - #define _mm512_shuffle_epi32(a, imm8) simde_mm512_shuffle_epi32((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i[0] = a_.m128i[ imm8 & 1]; - r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_i32x4 - #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) -#endif - -#define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) -#define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) - -#define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) -#define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) -#define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) - -#define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) -#define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) -#define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) - -#define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) -#define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) -#define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) - -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_shuffle_i32x4 - #undef _mm256_mask_shuffle_i32x4 - #define _mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) - #define _mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) - - #undef _mm256_shuffle_f32x4 - #undef _mm256_maskz_shuffle_f32x4 - #undef _mm256_mask_shuffle_f32x4 - #define _mm256_shuffle_f32x4(a, b, imm8) simde_mm256_shuffle_f32x4(a, b, imm8) - #define _mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) - #define _mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) - - #undef _mm256_shuffle_i64x2 - #undef _mm256_maskz_shuffle_i64x2 - #undef _mm256_mask_shuffle_i64x2 - #define _mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i64x2(a, b, imm8) - #define _mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) - #define _mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) - - #undef _mm256_shuffle_f64x2 - #undef _mm256_maskz_shuffle_f64x2 - #undef _mm256_mask_shuffle_f64x2 - #define _mm256_shuffle_f64x2(a, b, imm8) simde_mm256_shuffle_f64x2(a, b, imm8) - #define _mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) - #define _mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - r_.m128i[0] = a_.m128i[ imm8 & 3]; - r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; - r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; - r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_i32x4 - #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) -#endif - -#define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) -#define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) - -#define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) -#define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) -#define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) - -#define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) -#define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) -#define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) - -#define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) -#define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) -#define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) - -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_shuffle_i32x4 - #undef _mm512_mask_shuffle_i32x4 - #define _mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) - #define _mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) - - #undef _mm512_shuffle_f32x4 - #undef _mm512_maskz_shuffle_f32x4 - #undef _mm512_mask_shuffle_f32x4 - #define _mm512_shuffle_f32x4(a, b, imm8) simde_mm512_shuffle_f32x4(a, b, imm8) - #define _mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) - #define _mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) - - #undef _mm512_shuffle_i64x2 - #undef _mm512_maskz_shuffle_i64x2 - #undef _mm512_mask_shuffle_i64x2 - #define _mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i64x2(a, b, imm8) - #define _mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) - #define _mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) - - #undef _mm512_shuffle_f64x2 - #undef _mm512_maskz_shuffle_f64x2 - #undef _mm512_mask_shuffle_f64x2 - #define _mm512_shuffle_f64x2(a, b, imm8) simde_mm512_shuffle_f64x2(a, b, imm8) - #define _mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) - #define _mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_ps(a, b, imm8) _mm512_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ - \ - simde_mm512_shuffle_ps_a_.m256[0] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[0], simde_mm512_shuffle_ps_b_.m256[0], imm8); \ - simde_mm512_shuffle_ps_a_.m256[1] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[1], simde_mm512_shuffle_ps_b_.m256[1], imm8); \ - \ - simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ - simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ - \ - simde_mm512_shuffle_ps_a_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 64, \ - simde_mm512_shuffle_ps_a_.f32, \ - simde_mm512_shuffle_ps_b_.f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 16, \ - (((imm8) >> 6) & 3) + 16, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 20, \ - (((imm8) >> 6) & 3) + 20, \ - (((imm8) ) & 3) + 8, \ - (((imm8) >> 2) & 3) + 8, \ - (((imm8) >> 4) & 3) + 24, \ - (((imm8) >> 6) & 3) + 24, \ - (((imm8) ) & 3) + 12, \ - (((imm8) >> 2) & 3) + 12, \ - (((imm8) >> 4) & 3) + 28, \ - (((imm8) >> 6) & 3) + 28 \ - ); \ - \ - simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ - })) -#else - SIMDE_FUNCTION_ATTRIBUTES - simde__m512 - simde_mm512_shuffle_ps(simde__m512 a, simde__m512 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - const size_t halfway = (sizeof(r_.m128_private[0].f32) / sizeof(r_.m128_private[0].f32[0]) / 2); - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - SIMDE_VECTORIZE - for (size_t j = 0 ; j < halfway ; j++) { - r_.m128_private[i].f32[j] = a_.m128_private[i].f32[(imm8 >> (j * 2)) & 3]; - r_.m128_private[i].f32[halfway + j] = b_.m128_private[i].f32[(imm8 >> ((halfway + j) * 2)) & 3]; - } - } - - return simde__m512_from_private(r_); - } -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_ps - #define _mm512_shuffle_ps(a, b, imm8) simde_mm512_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_shuffle_pd(simde__m512d a, simde__m512d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.f64) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[i * 2] = (imm8 & ( 1 << (i*2) )) ? a_.f64[i * 2 + 1]: a_.f64[i * 2]; - r_.f64[i * 2 + 1] = (imm8 & ( 1 << (i*2+1) )) ? b_.f64[i * 2 + 1]: b_.f64[i * 2]; - } - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_shuffle_pd(a, b, imm8) _mm512_shuffle_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_pd - #define _mm512_shuffle_pd(a, b, imm8) simde_mm512_shuffle_pd(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_shufflehi_epi16(a, imm8) _mm512_shufflehi_epi16(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shufflehi_epi16(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shufflehi_epi16_r_, \ - simde_mm512_shufflehi_epi16_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shufflehi_epi16_r_.m128i[0] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[0], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[1] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[1], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[2] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[2], (imm8)); \ - simde_mm512_shufflehi_epi16_r_.m128i[3] = simde_mm_shufflehi_epi16( \ - simde_mm512_shufflehi_epi16_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shufflehi_epi16_r_); \ - })) -#else -# define simde_mm512_shufflehi_epi16(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 3), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 2), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm512_extracti32x4_epi32((a), 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shufflehi_epi16 - #define _mm512_shufflehi_epi16(a, imm8) simde_mm512_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_shufflelo_epi16(a, imm8) _mm512_shufflelo_epi16(a, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) -# define simde_mm512_shufflelo_epi16(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512i_private simde_mm512_shufflelo_epi16_r_, \ - simde_mm512_shufflelo_epi16_a_ = simde__m512i_to_private((a)); \ - simde_mm512_shufflelo_epi16_r_.m128i[0] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[0], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[1] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[1], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[2] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[2], (imm8)); \ - simde_mm512_shufflelo_epi16_r_.m128i[3] = simde_mm_shufflelo_epi16( \ - simde_mm512_shufflelo_epi16_a_.m128i[3], (imm8)); \ - simde__m512i_from_private(simde_mm512_shufflelo_epi16_r_); \ - })) -#else -# define simde_mm512_shufflelo_epi16(a, imm8) \ - simde_x_mm512_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 3), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 2), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm512_extracti32x4_epi32((a), 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shufflelo_epi16 - #define _mm512_shufflelo_epi16(a, imm8) simde_mm512_shufflelo_epi16(a, imm8) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SHUFFLE_H) */ -/* :: End simde/x86/avx512/shuffle.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/xor.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_XOR_H) -#define SIMDE_X86_AVX512_XOR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - /* TODO: generate reduced case to give to Intel */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_ps - #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_ps - #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_ps - #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_pd - #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_pd - #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_pd - #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi32 - #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi32 - #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi32 - #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi64 - #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi64 - #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi64 - #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); - r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_si512 - #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ -/* :: End simde/x86/avx512/xor.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* In all the *gf2p8affine* intrinsics the argument b must be a compile-time constant so we must use macros and simde_x_mm* helper functions */ - -/* N.B. The _mm*gf2p8affineinv_epi64_epi8 and _mm*gf2p8mul_epi8 intrinsics are for a Field Generator Polynomial (FGP) (aka reduction polynomial) of 0x11B */ -/* Only the _mm*gf2p8affine_epi64_epi8 intrinsics do not assume this specific FGP */ - -/* The field generator polynomial is 0x11B but we make the 0x100 bit implicit to fit inside 8 bits */ -#define SIMDE_X86_GFNI_FGP 0x1B - -/* Computing the inverse of a GF element is expensive so use this LUT for an FGP of 0x11B */ - -static const union { - uint8_t u8[256]; - simde__m128i m128i[16]; -} simde_x_gf2p8inverse_lut = { - { - 0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, 0xb0, 0xe1, 0xe5, 0xc7, - 0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2, - 0x3a, 0x6e, 0x5a, 0xf1, 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2, - 0x2c, 0x45, 0x92, 0x6c, 0xf3, 0x39, 0x66, 0x42, 0xf2, 0x35, 0x20, 0x6f, 0x77, 0xbb, 0x59, 0x19, - 0x1d, 0xfe, 0x37, 0x67, 0x2d, 0x31, 0xf5, 0x69, 0xa7, 0x64, 0xab, 0x13, 0x54, 0x25, 0xe9, 0x09, - 0xed, 0x5c, 0x05, 0xca, 0x4c, 0x24, 0x87, 0xbf, 0x18, 0x3e, 0x22, 0xf0, 0x51, 0xec, 0x61, 0x17, - 0x16, 0x5e, 0xaf, 0xd3, 0x49, 0xa6, 0x36, 0x43, 0xf4, 0x47, 0x91, 0xdf, 0x33, 0x93, 0x21, 0x3b, - 0x79, 0xb7, 0x97, 0x85, 0x10, 0xb5, 0xba, 0x3c, 0xb6, 0x70, 0xd0, 0x06, 0xa1, 0xfa, 0x81, 0x82, - 0x83, 0x7e, 0x7f, 0x80, 0x96, 0x73, 0xbe, 0x56, 0x9b, 0x9e, 0x95, 0xd9, 0xf7, 0x02, 0xb9, 0xa4, - 0xde, 0x6a, 0x32, 0x6d, 0xd8, 0x8a, 0x84, 0x72, 0x2a, 0x14, 0x9f, 0x88, 0xf9, 0xdc, 0x89, 0x9a, - 0xfb, 0x7c, 0x2e, 0xc3, 0x8f, 0xb8, 0x65, 0x48, 0x26, 0xc8, 0x12, 0x4a, 0xce, 0xe7, 0xd2, 0x62, - 0x0c, 0xe0, 0x1f, 0xef, 0x11, 0x75, 0x78, 0x71, 0xa5, 0x8e, 0x76, 0x3d, 0xbd, 0xbc, 0x86, 0x57, - 0x0b, 0x28, 0x2f, 0xa3, 0xda, 0xd4, 0xe4, 0x0f, 0xa9, 0x27, 0x53, 0x04, 0x1b, 0xfc, 0xac, 0xe6, - 0x7a, 0x07, 0xae, 0x63, 0xc5, 0xdb, 0xe2, 0xea, 0x94, 0x8b, 0xc4, 0xd5, 0x9d, 0xf8, 0x90, 0x6b, - 0xb1, 0x0d, 0xd6, 0xeb, 0xc6, 0x0e, 0xcf, 0xad, 0x08, 0x4e, 0xd7, 0xe3, 0x5d, 0x50, 0x1e, 0xb3, - 0x5b, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8c, 0xdd, 0x9c, 0x7d, 0xa0, 0xcd, 0x1a, 0x41, 0x1c - } -}; - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_gf2p8matrix_multiply_epi64_epi8 (simde__m128i x, simde__m128i A) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - const __m128i byte_select = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); - const __m128i zero = _mm_setzero_si128(); - __m128i r, a, p, X; - - a = _mm_shuffle_epi8(A, _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); - X = x; - r = zero; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = _mm_insert_epi16(zero, _mm_movemask_epi8(a), 0); - p = _mm_shuffle_epi8(p, byte_select); - p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X)); - r = _mm_xor_si128(r, p); - a = _mm_add_epi8(a, a); - X = _mm_add_epi8(X, X); - } - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i zero = _mm_setzero_si128(); - __m128i r, a, p, X; - - a = _mm_shufflehi_epi16(A, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0)); - a = _mm_shufflelo_epi16(a, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0)); - a = _mm_or_si128(_mm_slli_epi16(a, 8), _mm_srli_epi16(a, 8)); - X = _mm_unpacklo_epi8(x, _mm_unpackhi_epi64(x, x)); - r = zero; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = _mm_set1_epi16(HEDLEY_STATIC_CAST(short, _mm_movemask_epi8(a))); - p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X)); - r = _mm_xor_si128(r, p); - a = _mm_add_epi8(a, a); - X = _mm_add_epi8(X, X); - } - - return _mm_packus_epi16(_mm_srli_epi16(_mm_slli_epi16(r, 8), 8), _mm_srli_epi16(r, 8)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const uint8_t byte_interleave[16] = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; - static const uint8_t byte_deinterleave[16] = {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; - static const uint8_t mask_d[16] = {128, 128, 64, 64, 32, 32, 16, 16, 8, 8, 4, 4, 2, 2, 1, 1}; - const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d)); - int8x16_t r, a, t, X; - - t = simde__m128i_to_neon_i8(A); - a = vqtbl1q_s8(t, vld1q_u8(byte_interleave)); - t = simde__m128i_to_neon_i8(x); - X = vqtbl1q_s8(t, vld1q_u8(byte_interleave)); - r = vdupq_n_s8(0); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - t = vshrq_n_s8(a, 7); - t = vandq_s8(t, mask); - t = vreinterpretq_s8_u16(vdupq_n_u16(vaddvq_u16(vreinterpretq_u16_s8(t)))); - t = vandq_s8(t, vshrq_n_s8(X, 7)); - r = veorq_s8(r, t); - a = vshlq_n_s8(a, 1); - X = vshlq_n_s8(X, 1); - } - - r = vqtbl1q_s8(r, vld1q_u8(byte_deinterleave)); - return simde__m128i_from_neon_i8(r); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - static const uint8_t mask_d[16] = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; - const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d)); - int8x16_t r, a, t, X; - int16x8_t t16; - int32x4_t t32; - - a = simde__m128i_to_neon_i8(A); - X = simde__m128i_to_neon_i8(x); - r = vdupq_n_s8(0); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - t = vshrq_n_s8(a, 7); - t = vandq_s8(t, mask); - t16 = vreinterpretq_s16_s8 (vorrq_s8 (t , vrev64q_s8 (t ))); - t32 = vreinterpretq_s32_s16(vorrq_s16(t16, vrev64q_s16(t16))); - t = vreinterpretq_s8_s32 (vorrq_s32(t32, vrev64q_s32(t32))); - t = vandq_s8(t, vshrq_n_s8(X, 7)); - r = veorq_s8(r, t); - a = vshlq_n_s8(a, 1); - X = vshlq_n_s8(X, 1); - } - - return simde__m128i_from_neon_i8(r); - #elif defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; - static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; - - X = simde__m128i_to_altivec_i8(x); - a = simde__m128i_to_altivec_u8(A); - X = vec_perm(X, X, byte_interleave); - r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - #if defined(SIMDE_BUG_CLANG_50932) - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_bperm(HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select)); - #else - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm_u128(a, bit_select)); - #endif - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 3)); - p &= X < zero; - r ^= p; - a += a; - X += X; - } - - r = vec_perm(r, r, byte_deinterleave); - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15}; - static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) p, r; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) a, X; - - X = simde__m128i_to_altivec_i8(x); - a = simde__m128i_to_altivec_i8(A); - r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = a < zero; - p &= mask; - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_sum2(vec_sum4(p, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero)), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero))); - p = vec_perm(p, p, byte_select); - p &= X < zero; - r ^= p; - a += a; - X += X; - } - - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {64, 72, 80, 88, 96, 104, 112, 120, 0, 8, 16, 24, 32, 40, 48, 56}; - const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; - - X = simde__m128i_to_altivec_i8(x); - a = simde__m128i_to_altivec_u8(A); - X = vec_perm(X, X, byte_interleave); - r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - #if defined(SIMDE_BUG_CLANG_50932) - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select)); - #else - p = vec_bperm(a, bit_select); - #endif - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 4)); - p = vec_and(p, vec_cmplt(X, zero)); - r = vec_xor(r, p); - a = vec_add(a, a); - X = vec_add(X, X); - } - - r = vec_perm(r, r, byte_deinterleave); - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12}; - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) sevens = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 7)); - const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; - - X = simde__m128i_to_altivec_i8(x); - a = simde__m128i_to_altivec_u8(A); - r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = vec_sr(a, sevens); - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_msum(p, - mask, - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero))); - p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), - vec_sum2s(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), p), - HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), zero))); - p = vec_perm(p, p, byte_select); - p = vec_and(p, vec_cmplt(X, zero)); - r = vec_xor(r, p); - a = vec_add(a, a); - X = vec_add(X, X); - } - - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t zero = wasm_i8x16_splat(0); - v128_t a, p, r, X; - - X = simde__m128i_to_wasm_v128(x); - a = simde__m128i_to_wasm_v128(A); - a = wasm_i8x16_shuffle(a, a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - X = wasm_i8x16_shuffle(X, X, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); - r = zero; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, wasm_i8x16_bitmask(a))); - p = wasm_v128_and(p, wasm_i8x16_lt(X, zero)); - r = wasm_v128_xor(r, p); - a = wasm_i8x16_add(a, a); - X = wasm_i8x16_add(X, X); - } - - r = wasm_i8x16_shuffle(r, r, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - return simde__m128i_from_wasm_v128(r); - #else - simde__m128i_private - r_, - x_ = simde__m128i_to_private(x), - A_ = simde__m128i_to_private(A); - - const uint64_t ones = UINT64_C(0x0101010101010101); - const uint64_t mask = UINT64_C(0x0102040810204080); - uint64_t q; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - q = simde_endian_bswap64_le(A_.u64[i / 8]); - q &= HEDLEY_STATIC_CAST(uint64_t, x_.u8[i]) * ones; - q ^= q >> 4; - q ^= q >> 2; - q ^= q >> 1; - q &= ones; - q *= 255; - q &= mask; - q |= q >> 32; - q |= q >> 16; - q |= q >> 8; - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, q); - } - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_gf2p8matrix_multiply_epi64_epi8 (simde__m256i x, simde__m256i A) { - #if defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i r, a, p; - const simde__m256i byte_select = simde_x_mm256_set_epu64x(UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), - UINT64_C(0x0101010101010101), UINT64_C(0x0000000000000000)); - a = simde_mm256_shuffle_epi8(A, simde_mm256_broadcastsi128_si256(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); - r = simde_mm256_setzero_si256(); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = simde_mm256_set1_epi32(simde_mm256_movemask_epi8(a)); - p = simde_mm256_shuffle_epi8(p, byte_select); - p = simde_mm256_xor_si256(r, p); - r = simde_mm256_blendv_epi8(r, p, x); - a = simde_mm256_add_epi8(a, a); - x = simde_mm256_add_epi8(x, x); - } - - return r; - #else - simde__m256i_private - r_, - x_ = simde__m256i_to_private(x), - A_ = simde__m256i_to_private(A); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x_.m128i[i], A_.m128i[i]); - } - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_gf2p8matrix_multiply_epi64_epi8 (simde__m512i x, simde__m512i A) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - simde__m512i r, a, p; - const simde__m512i byte_select = simde_x_mm512_set_epu64(UINT64_C(0x0707070707070707), UINT64_C(0x0606060606060606), UINT64_C(0x0505050505050505), UINT64_C(0x0404040404040404), - UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), UINT64_C(0x0101010101010101), UINT64_C(0X0000000000000000)); - a = simde_mm512_shuffle_epi8(A, simde_mm512_broadcast_i32x4(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); - r = simde_mm512_setzero_si512(); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 8 ; i++) { - p = simde_mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, simde_mm512_movepi8_mask(a))); - p = simde_mm512_maskz_shuffle_epi8(simde_mm512_movepi8_mask(x), p, byte_select); - r = simde_mm512_xor_si512(r, p); - a = simde_mm512_add_epi8(a, a); - x = simde_mm512_add_epi8(x, x); - } - - return r; - #else - simde__m512i_private - r_, - x_ = simde__m512i_to_private(x), - A_ = simde__m512i_to_private(A); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x_.m256i[i], A_.m256i[i]); - } - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_gf2p8inverse_epi8 (simde__m128i x) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - /* N.B. CM: this fallback may not be faster */ - simde__m128i r, u, t, test; - const simde__m128i sixteens = simde_mm_set1_epi8(16); - const simde__m128i masked_x = simde_mm_and_si128(x, simde_mm_set1_epi8(0x0F)); - - test = simde_mm_set1_epi8(INT8_MIN /* 0x80 */); - x = simde_mm_xor_si128(x, test); - r = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[0], masked_x); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 1 ; i < 16 ; i++) { - t = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[i], masked_x); - test = simde_mm_add_epi8(test, sixteens); - u = simde_mm_cmplt_epi8(x, test); - r = simde_mm_blendv_epi8(t, r, u); - } - - return r; - #else - simde__m128i_private - r_, - x_ = simde__m128i_to_private(x); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_x_gf2p8inverse_lut.u8[x_.u8[i]]; - } - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_gf2p8inverse_epi8 (simde__m256i x) { - #if defined(SIMDE_X86_AVX2_NATIVE) - /* N.B. CM: this fallback may not be faster */ - simde__m256i r, u, t, test; - const simde__m256i sixteens = simde_mm256_set1_epi8(16); - const simde__m256i masked_x = simde_mm256_and_si256(x, simde_mm256_set1_epi8(0x0F)); - - test = simde_mm256_set1_epi8(INT8_MIN /* 0x80 */); - x = simde_mm256_xor_si256(x, test); - r = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 1 ; i < 16 ; i++) { - t = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); - test = simde_mm256_add_epi8(test, sixteens); - u = simde_mm256_cmpgt_epi8(test, x); - r = simde_mm256_blendv_epi8(t, r, u); - } - - return r; - #else - simde__m256i_private - r_, - x_ = simde__m256i_to_private(x); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_gf2p8inverse_epi8(x_.m128i[i]); - } - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_gf2p8inverse_epi8 (simde__m512i x) { - /* N.B. CM: TODO: later add VBMI version using just two _mm512_permutex2var_epi8 and friends */ - /* But except for Cannon Lake all processors with VBMI also have GFNI */ - #if defined(SIMDE_X86_AVX512BW_NATIVE) - /* N.B. CM: this fallback may not be faster */ - simde__m512i r, test; - const simde__m512i sixteens = simde_mm512_set1_epi8(16); - const simde__m512i masked_x = simde_mm512_and_si512(x, simde_mm512_set1_epi8(0x0F)); - - r = simde_mm512_shuffle_epi8(simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); - test = sixteens; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 1 ; i < 16 ; i++) { - r = simde_mm512_mask_shuffle_epi8(r, simde_mm512_cmpge_epu8_mask(x, test), simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); - test = simde_mm512_add_epi8(test, sixteens); - } - - return r; - #else - simde__m512i_private - r_, - x_ = simde__m512i_to_private(x); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_x_mm256_gf2p8inverse_epi8(x_.m256i[i]); - } - - return simde__m512i_from_private(r_); - #endif -} - -#define simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm_gf2p8matrix_multiply_epi64_epi8(simde_x_mm_gf2p8inverse_epi8(x), A) -#define simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(simde_x_mm256_gf2p8inverse_epi8(x), A) -#define simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(simde_x_mm512_gf2p8inverse_epi8(x), A) - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_gf2p8affine_epi64_epi8 (simde__m128i x, simde__m128i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) - #define simde_mm_gf2p8affine_epi64_epi8(x, A, b) _mm_gf2p8affine_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_gf2p8affine_epi64_epi8 - #define _mm_gf2p8affine_epi64_epi8(x, A, b) simde_mm_gf2p8affine_epi64_epi8(x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_gf2p8affine_epi64_epi8 (simde__m256i x, simde__m256i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_gf2p8affine_epi64_epi8(x, A, b) _mm256_gf2p8affine_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_gf2p8affine_epi64_epi8 - #define _mm256_gf2p8affine_epi64_epi8(x, A, b) simde_mm256_gf2p8affine_epi64_epi8(x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_gf2p8affine_epi64_epi8 (simde__m512i x, simde__m512i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_gf2p8affine_epi64_epi8(x, A, b) _mm512_gf2p8affine_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_gf2p8affine_epi64_epi8 - #define _mm512_gf2p8affine_epi64_epi8(x, A, b) simde_mm512_gf2p8affine_epi64_epi8(x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_gf2p8affine_epi64_epi8 - #define _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_gf2p8affine_epi64_epi8 - #define _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_gf2p8affine_epi64_epi8 - #define _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#else - #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_gf2p8affine_epi64_epi8 - #define _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#else - #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_gf2p8affine_epi64_epi8 - #define _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#else - #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_gf2p8affine_epi64_epi8 - #define _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_gf2p8affineinv_epi64_epi8 (simde__m128i x, simde__m128i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) - #define simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) _mm_gf2p8affineinv_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_gf2p8affineinv_epi64_epi8 - #define _mm_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_gf2p8affineinv_epi64_epi8 (simde__m256i x, simde__m256i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) _mm256_gf2p8affineinv_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_gf2p8affineinv_epi64_epi8 - #define _mm256_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_gf2p8affineinv_epi64_epi8 (simde__m512i x, simde__m512i A, int b) - SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { - return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); -} -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) _mm512_gf2p8affineinv_epi64_epi8(x, A, b) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_gf2p8affineinv_epi64_epi8 - #define _mm512_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_gf2p8affineinv_epi64_epi8 - #define _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_gf2p8affineinv_epi64_epi8 - #define _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#else - #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_gf2p8affineinv_epi64_epi8 - #define _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#else - #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_gf2p8affineinv_epi64_epi8 - #define _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#else - #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_gf2p8affineinv_epi64_epi8 - #define _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#endif - -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#else - #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) -#endif -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_gf2p8affineinv_epi64_epi8 - #define _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_gf2p8mul_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || !defined(SIMDE_X86_AVX512F_NATIVE)) - return _mm_gf2p8mul_epi8(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const poly8x16_t pa = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(a)); - const poly8x16_t pb = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(b)); - const uint8x16_t lo = vreinterpretq_u8_p16(vmull_p8(vget_low_p8(pa), vget_low_p8(pb))); - #if defined (SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16_t hi = vreinterpretq_u8_p16(vmull_high_p8(pa, pb)); - #else - uint8x16_t hi = vreinterpretq_u8_p16(vmull_p8(vget_high_p8(pa), vget_high_p8(pb))); - #endif - uint8x16x2_t hilo = vuzpq_u8(lo, hi); - uint8x16_t r = hilo.val[0]; - hi = hilo.val[1]; - const uint8x16_t idxHi = vshrq_n_u8(hi, 4); - const uint8x16_t idxLo = vandq_u8(hi, vdupq_n_u8(0xF)); - - #if defined (SIMDE_ARM_NEON_A64V8_NATIVE) - static const uint8_t reduceLutHiData[] = { - 0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, - 0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53 - }; - static const uint8_t reduceLutLoData[] = { - 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, - 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 - }; - const uint8x16_t reduceLutHi = vld1q_u8(reduceLutHiData); - const uint8x16_t reduceLutLo = vld1q_u8(reduceLutLoData); - r = veorq_u8(r, vqtbl1q_u8(reduceLutHi, idxHi)); - r = veorq_u8(r, vqtbl1q_u8(reduceLutLo, idxLo)); - #else - static const uint8_t reduceLutHiData[] = { - 0x00, 0x2f, - 0xab, 0x84, - 0x4d, 0x62, - 0xe6, 0xc9, - 0x9a, 0xb5, - 0x31, 0x1e, - 0xd7, 0xf8, - 0x7c, 0x53 - }; - static const uint8_t reduceLutLoData[] = { - 0x00, 0xd8, - 0x1b, 0xc3, - 0x36, 0xee, - 0x2d, 0xf5, - 0x6c, 0xb4, - 0x77, 0xaf, - 0x5a, 0x82, - 0x41, 0x99 - }; - const uint8x8x2_t reduceLutHi = vld2_u8(reduceLutHiData); - const uint8x8x2_t reduceLutLo = vld2_u8(reduceLutLoData); - r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutHi, vget_low_u8(idxHi)), vtbl2_u8(reduceLutHi, vget_high_u8(idxHi)))); - r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutLo, vget_low_u8(idxLo)), vtbl2_u8(reduceLutLo, vget_high_u8(idxLo)))); - #endif - return simde__m128i_from_neon_u8(r); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, lo, hi; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) even, odd, mask0x00FF; - x = simde__m128i_to_altivec_u8(a); - y = simde__m128i_to_altivec_u8(b); - mask0x00FF = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x00FF)); - lo = y & HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), mask0x00FF); - hi = y ^ lo; - even = vec_gfmsum(x, lo); - odd = vec_gfmsum(x, hi); - lo = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(vec_rli(odd, 8), even, mask0x00FF)); - hi = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(odd, vec_rli(even, 8), mask0x00FF)); - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutHi = {0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, 0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53}; - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutLo = {0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99}; - lo = lo ^ vec_perm(reduceLutHi, reduceLutHi, vec_rli(hi, 4)); - lo = lo ^ vec_perm(reduceLutLo, reduceLutLo, hi); - return simde__m128i_from_altivec_u8(lo); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, r, t, m; - x = simde__m128i_to_altivec_u8(a); - y = simde__m128i_to_altivec_u8(b); - - const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splat_s8(0); - - m = vec_splat_u8(0x01); - - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) fgp = vec_splats(HEDLEY_STATIC_CAST(unsigned char, SIMDE_X86_GFNI_FGP)); - t = vec_and(y, m); - t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m)); - r = vec_and(x, t); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 7 ; i++) { - t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), x), zero)); - x = vec_add(x, x); - t = vec_and(fgp, t); - x = vec_xor(x, t); - m = vec_add(m, m); - t = vec_and(y, m); - t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m)); - t = vec_and(x, t); - r = vec_xor(r, t); - } - - return simde__m128i_from_altivec_u8(r); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t x, y, r, t, m; - x = simde__m128i_to_wasm_v128(a); - y = simde__m128i_to_wasm_v128(b); - - m = wasm_i8x16_splat(0x01); - - const v128_t fgp = wasm_i8x16_splat(SIMDE_X86_GFNI_FGP); - - t = wasm_v128_and(y, m); - t = wasm_i8x16_eq(t, m); - r = wasm_v128_and(x, t); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 7 ; i++) { - t = wasm_i8x16_shr(x, 7); - x = wasm_i8x16_add(x, x); - t = wasm_v128_and(fgp, t); - x = wasm_v128_xor(x, t); - m = wasm_i8x16_add(m, m); - t = wasm_v128_and(y, m); - t = wasm_i8x16_eq(t, m); - t = wasm_v128_and(x, t); - r = wasm_v128_xor(r, t); - } - - return simde__m128i_from_wasm_v128(r); - #elif defined(SIMDE_X86_AVX512BW_NATIVE) - simde__m512i r4, t4, u4; - simde__mmask64 ma, mb; - - simde__m512i a4 = simde_mm512_broadcast_i32x4(a); - const simde__m512i zero = simde_mm512_setzero_si512(); - simde__mmask16 m8 = simde_mm512_cmpeq_epi32_mask(zero, zero); - - const simde__m512i b4 = simde_mm512_broadcast_i32x4(b); - - simde__m512i bits = simde_mm512_set_epi64(0x4040404040404040, - 0x4040404040404040, - 0x1010101010101010, - 0x1010101010101010, - 0x0404040404040404, - 0x0404040404040404, - 0x0101010101010101, - 0x0101010101010101); - - const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); - - for (int i = 0 ; i < 3 ; i++) { - m8 = simde_kshiftli_mask16(m8, 4); - - ma = simde_mm512_cmplt_epi8_mask(a4, zero); - u4 = simde_mm512_add_epi8(a4, a4); - t4 = simde_mm512_maskz_mov_epi8(ma, fgp); - u4 = simde_mm512_xor_epi32(u4, t4); - - ma = simde_mm512_cmplt_epi8_mask(u4, zero); - u4 = simde_mm512_add_epi8(u4, u4); - t4 = simde_mm512_maskz_mov_epi8(ma, fgp); - a4 = simde_mm512_mask_xor_epi32(a4, m8, u4, t4); - } - - mb = simde_mm512_test_epi8_mask(b4, bits); - bits = simde_mm512_add_epi8(bits, bits); - ma = simde_mm512_cmplt_epi8_mask(a4, zero); - r4 = simde_mm512_maskz_mov_epi8(mb, a4); - mb = simde_mm512_test_epi8_mask(b4, bits); - a4 = simde_mm512_add_epi8(a4, a4); - t4 = simde_mm512_maskz_mov_epi8(ma, fgp); - a4 = simde_mm512_xor_si512(a4, t4); - t4 = simde_mm512_maskz_mov_epi8(mb, a4); - r4 = simde_mm512_xor_si512(r4, t4); - - r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (1 << 6) + (0 << 4) + (3 << 2) + 2)); - r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (0 << 6) + (3 << 4) + (2 << 2) + 1)); - - return simde_mm512_extracti32x4_epi32(r4, 0); - #elif defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i r2, t2; - simde__m256i a2 = simde_mm256_broadcastsi128_si256(a); - const simde__m256i zero = simde_mm256_setzero_si256(); - const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP); - const simde__m256i ones = simde_mm256_set1_epi8(0x01); - simde__m256i b2 = simde_mm256_set_m128i(simde_mm_srli_epi64(b, 4), b); - - for (int i = 0 ; i < 4 ; i++) { - t2 = simde_mm256_cmpgt_epi8(zero, a2); - t2 = simde_mm256_and_si256(fgp, t2); - a2 = simde_mm256_add_epi8(a2, a2); - a2 = simde_mm256_xor_si256(a2, t2); - } - - a2 = simde_mm256_inserti128_si256(a2, a, 0); - - t2 = simde_mm256_and_si256(b2, ones); - t2 = simde_mm256_cmpeq_epi8(t2, ones); - r2 = simde_mm256_and_si256(a2, t2); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 3 ; i++) { - t2 = simde_mm256_cmpgt_epi8(zero, a2); - t2 = simde_mm256_and_si256(fgp, t2); - a2 = simde_mm256_add_epi8(a2, a2); - a2 = simde_mm256_xor_si256(a2, t2); - b2 = simde_mm256_srli_epi64(b2, 1); - t2 = simde_mm256_and_si256(b2, ones); - t2 = simde_mm256_cmpeq_epi8(t2, ones); - t2 = simde_mm256_and_si256(a2, t2); - r2 = simde_mm256_xor_si256(r2, t2); - } - - return simde_mm_xor_si128(simde_mm256_extracti128_si256(r2, 1), - simde_mm256_extracti128_si256(r2, 0)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde__m128i r, t; - const simde__m128i zero = simde_mm_setzero_si128(); - const simde__m128i ones = simde_mm_set1_epi8(0x01); - - const simde__m128i fgp = simde_mm_set1_epi8(SIMDE_X86_GFNI_FGP); - - t = simde_mm_and_si128(b, ones); - t = simde_mm_cmpeq_epi8(t, ones); - r = simde_mm_and_si128(a, t); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 7 ; i++) { - t = simde_mm_cmpgt_epi8(zero, a); - t = simde_mm_and_si128(fgp, t); - a = simde_mm_add_epi8(a, a); - a = simde_mm_xor_si128(a, t); - b = simde_mm_srli_epi64(b, 1); - t = simde_mm_and_si128(b, ones); - t = simde_mm_cmpeq_epi8(t, ones); - t = simde_mm_and_si128(a, t); - r = simde_mm_xor_si128(r, t); - } - - return r; - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const uint8_t fgp = SIMDE_X86_GFNI_FGP; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = 0; - while ((a_.u8[i] != 0) && (b_.u8[i] != 0)) { - if (b_.u8[i] & 1) - r_.u8[i] ^= a_.u8[i]; - - if (a_.u8[i] & 0x80) - a_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.u8[i] << 1) ^ fgp); - else - a_.u8[i] <<= 1; - - b_.u8[i] >>= 1; - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_gf2p8mul_epi8 - #define _mm_gf2p8mul_epi8(a, b) simde_mm_gf2p8mul_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_gf2p8mul_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || (defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE))) - return _mm256_gf2p8mul_epi8(a, b); - #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - simde__mmask64 ma, mb; - simde__m512i r, t, s; - simde__m512i a2 = simde_mm512_broadcast_i64x4(a); - const simde__m512i zero = simde_mm512_setzero_si512(); - - const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); - - s = simde_mm512_set1_epi8(0x01); - - for (int i = 0 ; i < 4 ; i++) { - ma = simde_mm512_cmplt_epi8_mask(a2, zero); - a2 = simde_mm512_add_epi8(a2, a2); - t = simde_mm512_xor_si512(a2, fgp); - a2 = simde_mm512_mask_mov_epi8(a2, ma, t); - } - - simde__m512i b2 = simde_mm512_inserti64x4(zero, simde_mm256_srli_epi64(b, 4), 1); - b2 = simde_mm512_inserti64x4(b2, b, 0); - a2 = simde_mm512_inserti64x4(a2, a, 0); - - mb = simde_mm512_test_epi8_mask(b2, s); - r = simde_mm512_maskz_mov_epi8(mb, a2); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 3 ; i++) { - ma = simde_mm512_cmplt_epi8_mask(a2, zero); - s = simde_mm512_add_epi8(s, s); - mb = simde_mm512_test_epi8_mask(b2, s); - a2 = simde_mm512_add_epi8(a2, a2); - t = simde_mm512_maskz_mov_epi8(ma, fgp); - a2 = simde_mm512_xor_si512(a2, t); - t = simde_mm512_maskz_mov_epi8(mb, a2); - r = simde_mm512_xor_si512(r, t); - } - - return simde_mm256_xor_si256(simde_mm512_extracti64x4_epi64(r, 1), - simde_mm512_extracti64x4_epi64(r, 0)); - #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX2_NATIVE) - simde__m256i r, t; - const simde__m256i zero = simde_mm256_setzero_si256(); - const simde__m256i ones = simde_mm256_set1_epi8(0x01); - - const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP); - - t = simde_mm256_and_si256(b, ones); - t = simde_mm256_cmpeq_epi8(t, ones); - r = simde_mm256_and_si256(a, t); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 7 ; i++) { - t = simde_mm256_cmpgt_epi8(zero, a); - t = simde_mm256_and_si256(fgp, t); - a = simde_mm256_add_epi8(a, a); - a = simde_mm256_xor_si256(a, t); - b = simde_mm256_srli_epi64(b, 1); - t = simde_mm256_and_si256(b, ones); - t = simde_mm256_cmpeq_epi8(t, ones); - t = simde_mm256_and_si256(a, t); - r = simde_mm256_xor_si256(r, t); - } - - return r; - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_gf2p8mul_epi8 - #define _mm256_gf2p8mul_epi8(a, b) simde_mm256_gf2p8mul_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_gf2p8mul_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_gf2p8mul_epi8(a, b); - #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - simde__m512i r, s, t; - simde__mmask64 ma, mb; - const simde__m512i zero = simde_mm512_setzero_si512(); - - const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); - - s = simde_mm512_set1_epi8(0x01); - - mb = simde_mm512_test_epi8_mask(b, s); - r = simde_mm512_maskz_mov_epi8(mb, a); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (int i = 0 ; i < 7 ; i++) { - ma = simde_mm512_cmplt_epi8_mask(a, zero); - s = simde_mm512_add_epi8(s, s); - mb = simde_mm512_test_epi8_mask(b, s); - a = simde_mm512_add_epi8(a, a); - t = simde_mm512_maskz_mov_epi8(ma, fgp); - a = simde_mm512_xor_si512(a, t); - t = simde_mm512_maskz_mov_epi8(mb, a); - r = simde_mm512_xor_si512(r, t); - } - - return r; - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_gf2p8mul_epi8 - #define _mm512_gf2p8mul_epi8(a, b) simde_mm512_gf2p8mul_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_gf2p8mul_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_gf2p8mul_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_gf2p8mul_epi8 - #define _mm_mask_gf2p8mul_epi8(src, k, a, b) simde_mm_mask_gf2p8mul_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_gf2p8mul_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_gf2p8mul_epi8(src, k, a, b); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_gf2p8mul_epi8 - #define _mm256_mask_gf2p8mul_epi8(src, k, a, b) simde_mm256_mask_gf2p8mul_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_gf2p8mul_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_gf2p8mul_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_gf2p8mul_epi8 - #define _mm512_mask_gf2p8mul_epi8(src, k, a, b) simde_mm512_mask_gf2p8mul_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_gf2p8mul_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_gf2p8mul_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_gf2p8mul_epi8 - #define _mm_maskz_gf2p8mul_epi8(k, a, b) simde_mm_maskz_gf2p8mul_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_gf2p8mul_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_gf2p8mul_epi8(k, a, b); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_gf2p8mul_epi8 - #define _mm256_maskz_gf2p8mul_epi8(k, a, b) simde_mm256_maskz_gf2p8mul_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_gf2p8mul_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_gf2p8mul_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8mul_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_gf2p8mul_epi8 - #define _mm512_maskz_gf2p8mul_epi8(k, a, b) simde_mm512_maskz_gf2p8mul_epi8(k, a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_GFNI_H) */ -/* :: End simde/x86/gfni.h :: */ diff --git a/src/simde/x86/mmx.h b/src/simde/x86/mmx.h deleted file mode 100644 index 71d7575e4..000000000 --- a/src/simde/x86/mmx.h +++ /dev/null @@ -1,10916 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ diff --git a/src/simde/x86/sse.h b/src/simde/x86/sse.h deleted file mode 100644 index 378299a63..000000000 --- a/src/simde/x86/sse.h +++ /dev/null @@ -1,16074 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ diff --git a/src/simde/x86/sse2.h b/src/simde/x86/sse2.h deleted file mode 100644 index 16f293332..000000000 --- a/src/simde/x86/sse2.h +++ /dev/null @@ -1,23815 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ diff --git a/src/simde/x86/sse3.h b/src/simde/x86/sse3.h deleted file mode 100644 index 8fab0bcd7..000000000 --- a/src/simde/x86/sse3.h +++ /dev/null @@ -1,24333 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ diff --git a/src/simde/x86/sse4.1.h b/src/simde/x86/sse4.1.h deleted file mode 100644 index 1882da375..000000000 --- a/src/simde/x86/sse4.1.h +++ /dev/null @@ -1,27764 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ diff --git a/src/simde/x86/sse4.2.h b/src/simde/x86/sse4.2.h deleted file mode 100644 index dbd82e3eb..000000000 --- a/src/simde/x86/sse4.2.h +++ /dev/null @@ -1,28148 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ diff --git a/src/simde/x86/ssse3.h b/src/simde/x86/ssse3.h deleted file mode 100644 index e9b76150f..000000000 --- a/src/simde/x86/ssse3.h +++ /dev/null @@ -1,25393 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ diff --git a/src/simde/x86/svml.h b/src/simde/x86/svml.h deleted file mode 100644 index 8387b4174..000000000 --- a/src/simde/x86/svml.h +++ /dev/null @@ -1,62308 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/svml.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_SVML_H) -#define SIMDE_X86_SVML_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/fma.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2019 Evan Nemerson - */ - -#if !defined(SIMDE_X86_FMA_H) -#define SIMDE_X86_FMA_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ - -#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_pd(a, b, c); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); - #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_pd - #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_pd(a, b, c); - #else - return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_pd - #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmadd_ps(a, b, c); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c), - r_; - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); - #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ps - #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmadd_ps(a, b, c); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c), - r_; - - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); - } - - return simde__m256_from_private(r_); - #else - return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmadd_ps - #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_sd(a, b, c); - #else - return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_sd - #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmadd_ss(a, b, c); - #else - return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmadd_ss - #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_pd(a, b, c); - #else - return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_pd - #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_pd(a, b, c); - #else - return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_pd - #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmaddsub_ps(a, b, c); - #else - return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmaddsub_ps - #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmaddsub_ps(a, b, c); - #else - return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmaddsub_ps - #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_pd(a, b, c); - #else - return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_pd - #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_pd(a, b, c); - #else - return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_pd - #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsub_ps(a, b, c); - #else - return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ps - #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsub_ps(a, b, c); - #else - return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsub_ps - #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_sd(a, b, c); - #else - return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_sd - #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fmsub_ss(a, b, c); - #else - return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsub_ss - #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_pd - #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; - r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_pd - #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fmsubadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fmsubadd_ps - #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fmsubadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; - r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fmsubadd_ps - #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_pd - #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_pd - #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmadd_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ps - #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmadd_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmadd_ps - #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_sd - #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmadd_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = a_; - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmadd_ss - #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_pd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_pd - #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_pd(a, b, c); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - c_ = simde__m256d_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_pd - #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm_fnmsub_ps(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ps - #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { - #if defined(SIMDE_X86_FMA_NATIVE) - return _mm256_fnmsub_ps(a, b, c); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - c_ = simde__m256_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm256_fnmsub_ps - #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_sd(a, b, c); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - c_ = simde__m128d_to_private(c); - - r_ = a_; - r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_sd - #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { - #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) - return _mm_fnmsub_ss(a, b, c); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - c_ = simde__m128_to_private(c); - - r_ = simde__m128_to_private(a); - r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) - #undef _mm_fnmsub_ss - #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_FMA_H) */ -/* :: End simde/x86/fma.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2019-2020 Michael R. Crusoe - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX2_H) -#define SIMDE_X86_AVX2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi8 - #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi16 - #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi32(simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi32 - #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi8 - #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi16 - #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi16(a, b); - #else - return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi16 - #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi32 - #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi32(a, b); - #else - return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi32 - #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi64 - #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm256_setzero_si256(); - - for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.m128i_private[h].i8[i] = 0; - } else if (srcpos > 15) { - r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; - } else { - r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; - } - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) -# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_alignr_epi8(a, b, count) \ - simde_mm256_set_m128i( \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_alignr_epi8 - #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_and_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_si256 - #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_andnot_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_si256 - #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi8 - #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi16 - #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadds_epi16(a, b); - #else - return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadds_epi16 - #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu8 - #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu16 - #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu8 - #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu16 - #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) -# define simde_mm_blend_epi32(a, b, imm8) \ - simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi32 - #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) -#elif defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi16(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi16 - #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi32(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi32 - #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_blendv_epi8(a, b, mask); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - mask_ = simde__m256i_to_private(mask); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); - r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(mask_.i8) tmp = mask_.i8 >> 7; - r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - int8_t tmp = mask_.i8[i] >> 7; - r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_epi8 - #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastb_epi8(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastb_epi8 - #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastb_epi8(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastb_epi8 - #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastw_epi16(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastw_epi16 - #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastw_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastw_epi16 - #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastd_epi32(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastd_epi32 - #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastd_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastd_epi32 - #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastq_epi64(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastq_epi64 - #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastq_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastq_epi64 - #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastss_ps(a); - #elif defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_shuffle_ps(a, a, 0); - #else - simde__m128_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastss_ps - #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastss_ps(a); - #else - simde__m256_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - __m128 tmp = _mm_permute_ps(a_.n, 0); - r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); - #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) - r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastss_ps - #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_broadcastsd_pd (simde__m128d a) { - return simde_mm_movedup_pd(a); -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastsd_pd - #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastsd_pd(a); - #else - simde__m256d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsd_pd - #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) - return _mm256_broadcastsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = a_; - r_.m128i_private[1] = a_; - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = a_.i64[1]; - r_.i64[2] = a_.i64[0]; - r_.i64[3] = a_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsi128_si256 - #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) - #undef _mm_broadcastsi128_si256 - #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i - imm8; - if(i >= (ssize/2)) { - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i + imm8; - if(i < (ssize/2)) { - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi8 - #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi16 - #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi32 - #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi64 - #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi8 - #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 > b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi16 - #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi32 - #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi64 - #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi16 - #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi32 - #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi64 - #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi32 - #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi64 - #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_epi64 - #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi16 - #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi32 - #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi64 - #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi32 - #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi64 - #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu32_epi64 - #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi8 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31){ - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i8[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi8 - #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi16 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i16[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi16 - #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extracti128_si256 - #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi32 - #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi32 - #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi32 - #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi32 - #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi32 - #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi32 - #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi32 - #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi32 - #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi64 - #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi64 - #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256i_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi64 - #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi64 - #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi64 - #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi64 - #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi64 - #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi64 - #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_ps - #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_ps - #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_ps - #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - src_ = simde__m256_to_private(src), - mask_ = simde__m256_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_ps - #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_ps - #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_ps - #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_ps - #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_ps - #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_pd - #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_pd - #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_pd - #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_pd - #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_pd - #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_pd - #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_pd - #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_pd - #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[ imm8 & 1 ] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_inserti128_si256 - #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_madd_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); - SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); - - SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); - SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); - product = a32x16 * b32x16; - - even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); - odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); - - r_.i32 = even + odd; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_madd_epi16 - #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maddubs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maddubs_epi16 - #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi32(mem_addr, mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi32 - #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi32(mem_addr, mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi32 - #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi64 - #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi64 - #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi32(mem_addr, mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi32 - #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi32(mem_addr, mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi32 - #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi64 - #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi64 - #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_max_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi8 - #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu8 - #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu16 - #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu32 - #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi16 - #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi32 - #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_min_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi8 - #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi16 - #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi32 - #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu8 - #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu16 - #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu32 - #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_movemask_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_movemask_epi8(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - uint32_t r = 0; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); - } - #else - r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); - } - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_epi8 - #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - const int a_offset1 = imm8 & 4; - const int b_offset1 = (imm8 & 3) << 2; - const int a_offset2 = (imm8 >> 3) & 4; - const int b_offset2 = ((imm8 >> 3) & 3) << 2; - - #if defined(simde_math_abs) - const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; - for (int i = 0 ; i < halfway_point ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); - r_.u16[halfway_point + i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mpsadbw_epu8 - #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhrs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi16 - #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi32(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi32 - #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_si256 - #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi16 - #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi32(a, b); - #else - simde__m256i_private - r_, - v_[] = { - simde__m256i_to_private(a), - simde__m256i_to_private(b) - }; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); - r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi32 - #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi16 - #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi32 - #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2x128_si256 - #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; - r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; - r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; - r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_epi64 - #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; - r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; - r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; - r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_pd - #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 7]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_epi32 - #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); - #else - return _mm256_permutevar8x32_ps(a, idx); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 7]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_ps - #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sad_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sad_epu8 - #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_shuffle_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { - r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; - r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi8 - #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_shuffle_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 32, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi32 - #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4, \ - 8, 9, 10, 11, \ - ((((imm8) ) & 3) + 8 + 4), \ - ((((imm8) >> 2) & 3) + 8 + 4), \ - ((((imm8) >> 4) & 3) + 8 + 4), \ - ((((imm8) >> 6) & 3) + 8 + 4) \ - ) }); })) -#else -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflehi_epi16 - #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7, \ - ((((imm8) ) & 3) + 8), \ - ((((imm8) >> 2) & 3) + 8), \ - ((((imm8) >> 4) & 3) + 8), \ - ((((imm8) >> 6) & 3) + 8), \ - 12, 13, 14, 15) }); })) -#else -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflelo_epi16 - #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi8 - #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi16 - #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi32 - #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi16 - #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi32 - #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi64 - #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* Note: There is no consistency in how compilers handle values outside of - the expected range, hence the discrepancy between what we allow and what - Intel specifies. Some compilers will return 0, others seem to just mask - off everything outside of the range. */ - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { - r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi16 - #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { - r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi32 - #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi64 - #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) - imm8; - r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_si256 - #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); - r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi32 - #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi32 - #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); - r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi64 - #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi64 - #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi16 - #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi32 - #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi16 - #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi32 - #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_srav_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); - r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srav_epi32 - #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srav_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - count_ = simde__m256i_to_private(count); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); - r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - if (shift > 31) shift = 31; - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srav_epi32 - #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi16 - #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi32 - #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi64 - #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - if (imm8 > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { - r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); - } - #else - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi16 - #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { - r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi32 - #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi64 - #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = imm8 + HEDLEY_STATIC_CAST(int, i); - r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_si256 - #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi32 - #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi32 - #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi64 - #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi64 - #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - return __builtin_nontemporal_load(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi8 - #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi16 - #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi16(a, b); - #else - return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi16 - #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi32 - #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi32(a, b); - #else - return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi32 - #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi64 - #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi8 - #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi16 - #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsubs_epi16(a, b); - #else - return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsubs_epi16 - #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu8 - #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu16 - #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_x_mm256_test_all_ones (simde__m256i a) { - simde__m256i_private a_ = simde__m256i_to_private(a); - int r; - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi8 - #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi16 - #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 0, 8, 1, 9, 4, 12, 5, 13); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi32 - #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi64 - #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi8 - #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 4, 20, 5, 21, 6, 22, 7, 23, - 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi16 - #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 2, 10, 3, 11, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi32 - #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi64 - #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_xor_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_si256 - #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX2_H) */ -/* :: End simde/x86/avx2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/abs.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_ABS_H) -#define SIMDE_X86_AVX512_ABS_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/types.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_TYPES_H) -#define SIMDE_X86_AVX512_TYPES_H -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for - * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte - * aligned even if we reduce the alignment requirements of other members. - * - * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the - * to/from private functions will break, and I'm not willing to change their APIs to use - * pointers (which would also require more verbose code on the caller side) just to make - * MSVC happy. - * - * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, - * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to - * fix this without requiring API changes (except transparently through macros), patches - * are welcome. - */ - -# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) -# if defined(SIMDE_X86_AVX512F_NATIVE) -# undef SIMDE_X86_AVX512F_NATIVE -# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") -# endif -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 -# else -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 -# endif - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_ALIGN_TO_16 __m128bh n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_BF16_NATIVE) - SIMDE_ALIGN_TO_32 __m256bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512BF16_NATIVE) - SIMDE_AVX512_ALIGN __m512bh n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512bh_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float16 f16[32]; - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - SIMDE_AVX512_ALIGN __m512h n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512h_private; - - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - #endif - - SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; - SIMDE_AVX512_ALIGN simde__m128i m128i[4]; - SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; - SIMDE_AVX512_ALIGN simde__m256i m256i[2]; - - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif - #endif -} simde__m512i_private; - -/* Intel uses the same header (immintrin.h) for everything AVX and - * later. If native aliases are enabled, and the machine has native - * support for AVX imintrin.h will already have been included, which - * means simde__m512* will already have been defined. So, even - * if the machine doesn't support AVX512F we need to use the native - * type; it has already been defined. - * - * However, we also can't just assume that including immintrin.h does - * actually define these. It could be a compiler which supports AVX - * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we - * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, - * so we assume that if it's present AVX-512F has already been - * declared. - * - * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC - * uses the preprocessor to define all the _MM_CMPINT_* members, - * in most compilers they are simply normal enum members. However, - * all compilers I've looked at use an object-like macro for - * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT - * is included in case a compiler does the reverse, though I haven't - * run into one which does. - * - * As for the ICC check, unlike other compilers, merely using the - * AVX-512 types causes ICC to generate AVX-512 instructions. */ -#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && \ - (defined(SIMDE_X86_AVX512F_NATIVE) || \ - !(defined(HEDLEY_INTEL_VERSION) || (defined(HEDLEY_MSVC_VERSION) && !defined(__clang__)))) - typedef __m512 simde__m512; - typedef __m512i simde__m512i; - typedef __m512d simde__m512d; - - typedef __mmask8 simde__mmask8; - typedef __mmask16 simde__mmask16; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512_private simde__m512; - typedef simde__m512i_private simde__m512i; - typedef simde__m512d_private simde__m512d; - #endif - - typedef uint8_t simde__mmask8; - typedef uint16_t simde__mmask16; -#endif - -#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) - typedef __m128bh simde__m128bh; - typedef __m256bh simde__m256bh; - typedef __m512bh simde__m512bh; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m128bh_private simde__m128bh; - typedef simde__m256bh_private simde__m256bh; - typedef simde__m512bh_private simde__m512bh; - #endif -#endif - -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - typedef __m512h simde__m512h; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_FLOAT16_VECTOR) - typedef simde_float16 simde__m512h SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512h_private simde__m512h; - #endif -#endif - -/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is - * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang - * both are in avx512bwintrin.h), not AVX-512F. However, we don't have - * a good (not-compiler-specific) way to detect if these headers have - * been included. In compilers which support AVX-512F but not - * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) - * won't exist. - * - * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t - * in all compilers, so it's safe to use these instead of typedefs to - * __mmask{16,32}. If you run into a problem with this please file an - * issue and we'll try to figure out a work-around. */ -typedef uint32_t simde__mmask32; -typedef uint64_t simde__mmask64; -#if !defined(__mmask16) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint16_t __mmask16; - #else - #define __mmask16 uint16_t; - #endif -#endif -#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef uint32_t __mmask32; - #else - #define __mmask32 uint32_t; - #endif -#endif -#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - #if defined(HEDLEY_GCC_VERSION) - typedef unsigned long long __mmask64; - #else - typedef uint64_t __mmask64; - #endif - #else - #define __mmask64 uint64_t; - #endif -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m512 __m512; - typedef simde__m512i __m512i; - typedef simde__m512d __m512d; - #else - #define __m512 simde__m512 - #define __m512i simde__m512i - #define __m512d simde__m512d - #endif -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m128bh __m128bh; - typedef simde__m256bh __m256bh; - typedef simde__m512bh __m512bh; - #else - #define __m128bh simde__m128bh - #define __m256bh simde__m256bh - #define __m512bh simde__m512bh - #endif -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - //typedef simde__m128h __m128h; - //typedef simde__m256h __m256h; - typedef simde__m512h __m512h; - #else - //#define __m128h simde__m128h - //#define __m256h simde__m256h - #define __m512h simde__m512h - #endif -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h), "simde__m512h size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h_private), "simde__m512h_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h) == 32, "simde__m512h is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h_private) == 32, "simde__m512h_private is not 32-byte aligned"); -#endif - -#define SIMDE_MM_CMPINT_EQ 0 -#define SIMDE_MM_CMPINT_LT 1 -#define SIMDE_MM_CMPINT_LE 2 -#define SIMDE_MM_CMPINT_FALSE 3 -#define SIMDE_MM_CMPINT_NE 4 -#define SIMDE_MM_CMPINT_NLT 5 -#define SIMDE_MM_CMPINT_NLE 6 -#define SIMDE_MM_CMPINT_TRUE 7 -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) -#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ -#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT -#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE -#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE -#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE -#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT -#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE -#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh -simde__m128bh_from_private(simde__m128bh_private v) { - simde__m128bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128bh_private -simde__m128bh_to_private(simde__m128bh v) { - simde__m128bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh -simde__m256bh_from_private(simde__m256bh_private v) { - simde__m256bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256bh_private -simde__m256bh_to_private(simde__m256bh v) { - simde__m256bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh -simde__m512bh_from_private(simde__m512bh_private v) { - simde__m512bh r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512bh_private -simde__m512bh_to_private(simde__m512bh v) { - simde__m512bh_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde__m512_from_private(simde__m512_private v) { - simde__m512 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512_private -simde__m512_to_private(simde__m512 v) { - simde__m512_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde__m512i_from_private(simde__m512i_private v) { - simde__m512i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i_private -simde__m512i_to_private(simde__m512i v) { - simde__m512i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde__m512d_from_private(simde__m512d_private v) { - simde__m512d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d_private -simde__m512d_to_private(simde__m512d v) { - simde__m512d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde__m512h_from_private(simde__m512h_private v) { - simde__m512h r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h_private -simde__m512h_to_private(simde__m512h v) { - simde__m512h_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ -/* :: End simde/x86/avx512/types.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_MOV_H) -#define SIMDE_X86_AVX512_MOV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cast.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_CAST_H) -#define SIMDE_X86_AVX512_CAST_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castpd_ps (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_ps - #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castpd_si512 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_si512 - #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castps_pd (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_pd - #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castps_si512 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_si512 - #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castph_si512 (simde__m512h a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castph_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castph_si512 - #define _mm512_castph_si512(a) simde_mm512_castph_si512(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_castsi512_ph (simde__m512i a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_castsi512_ph(a); - #else - simde__m512h r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ph - #define _mm512_castsi512_ph(a) simde_mm512_castsi512_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castsi512_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ps - #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castsi512_pd (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_pd - #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd128_pd512 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd128_pd512(a); - #else - simde__m512d_private r_; - r_.m128d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd128_pd512 - #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd256_pd512 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd256_pd512(a); - #else - simde__m512d_private r_; - r_.m256d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd256_pd512 - #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm512_castpd512_pd128 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd128(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd128 - #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_castpd512_pd256 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd256(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m256d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd256 - #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps128_ps512 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps128_ps512(a); - #else - simde__m512_private r_; - r_.m128[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps128_ps512 - #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps256_ps512 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps256_ps512(a); - #else - simde__m512_private r_; - r_.m256[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps256_ps512 - #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_castps512_ps128 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps128(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps128 - #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_castps512_ps256 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps256(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m256[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps256 - #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi128_si512 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi128_si512(a); - #else - simde__m512i_private r_; - r_.m128i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi128_si512 - #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi256_si512 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi256_si512(a); - #else - simde__m512i_private r_; - r_.m256i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi256_si512 - #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_castsi512_si128 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si128(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si128 - #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_castsi512_si256 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si256(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m256i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si256 - #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ -/* :: End simde/x86/avx512/cast.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SET_H) -#define SIMDE_X86_AVX512_SET_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/load.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_LOAD_H) -#define SIMDE_X86_AVX512_LOAD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_load_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); - #else - simde__m512d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_pd - #define _mm512_load_pd(a) simde_mm512_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_load_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); - #else - simde__m512 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ps - #define _mm512_load_ps(a) simde_mm512_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_load_ph (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h)); - #else - simde__m512h r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512h), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_ph - #define _mm512_load_ph(a) simde_mm512_load_ph(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_load_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); - #else - simde__m512i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); - return r; - #endif -} -#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_epi8 - #undef _mm512_load_epi16 - #undef _mm512_load_epi32 - #undef _mm512_load_epi64 - #undef _mm512_load_si512 - #define _mm512_load_si512(a) simde_mm512_load_si512(a) - #define _mm512_load_epi8(a) simde_mm512_load_si512(a) - #define _mm512_load_epi16(a) simde_mm512_load_si512(a) - #define _mm512_load_epi32(a) simde_mm512_load_si512(a) - #define _mm512_load_epi64(a) simde_mm512_load_si512(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ -/* :: End simde/x86/avx512/load.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, - int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - simde__m512i_private r_; - - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - r_.i16[16] = e16; - r_.i16[17] = e17; - r_.i16[18] = e18; - r_.i16[19] = e19; - r_.i16[20] = e20; - r_.i16[21] = e21; - r_.i16[22] = e22; - r_.i16[23] = e23; - r_.i16[24] = e24; - r_.i16[25] = e25; - r_.i16[26] = e26; - r_.i16[27] = e27; - r_.i16[28] = e28; - r_.i16[29] = e29; - r_.i16[30] = e30; - r_.i16[31] = e31; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi16 - #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - r_.i32[ 8] = e8; - r_.i32[ 9] = e9; - r_.i32[10] = e10; - r_.i32[11] = e11; - r_.i32[12] = e12; - r_.i32[13] = e13; - r_.i32[14] = e14; - r_.i32[15] = e15; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi32 - #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - r_.i64[4] = e4; - r_.i64[5] = e5; - r_.i64[6] = e6; - r_.i64[7] = e7; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi64 - #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, - uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, - uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, - uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, - uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m512i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - r_.u8[32] = e32; - r_.u8[33] = e33; - r_.u8[34] = e34; - r_.u8[35] = e35; - r_.u8[36] = e36; - r_.u8[37] = e37; - r_.u8[38] = e38; - r_.u8[39] = e39; - r_.u8[40] = e40; - r_.u8[41] = e41; - r_.u8[42] = e42; - r_.u8[43] = e43; - r_.u8[44] = e44; - r_.u8[45] = e45; - r_.u8[46] = e46; - r_.u8[47] = e47; - r_.u8[48] = e48; - r_.u8[49] = e49; - r_.u8[50] = e50; - r_.u8[51] = e51; - r_.u8[52] = e52; - r_.u8[53] = e53; - r_.u8[54] = e54; - r_.u8[55] = e55; - r_.u8[56] = e56; - r_.u8[57] = e57; - r_.u8[58] = e58; - r_.u8[59] = e59; - r_.u8[60] = e60; - r_.u8[61] = e61; - r_.u8[62] = e62; - r_.u8[63] = e63; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, - uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, - uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m512i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - r_.u16[16] = e16; - r_.u16[17] = e17; - r_.u16[18] = e18; - r_.u16[19] = e19; - r_.u16[20] = e20; - r_.u16[21] = e21; - r_.u16[22] = e22; - r_.u16[23] = e23; - r_.u16[24] = e24; - r_.u16[25] = e25; - r_.u16[26] = e26; - r_.u16[27] = e27; - r_.u16[28] = e28; - r_.u16[29] = e29; - r_.u16[30] = e30; - r_.u16[31] = e31; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, - uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - simde__m512i_private r_; - - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - r_.u32[ 8] = e8; - r_.u32[ 9] = e9; - r_.u32[10] = e10; - r_.u32[11] = e11; - r_.u32[12] = e12; - r_.u32[13] = e13; - r_.u32[14] = e14; - r_.u32[15] = e15; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m512i_private r_; - - r_.u64[ 0] = e0; - r_.u64[ 1] = e1; - r_.u64[ 2] = e2; - r_.u64[ 3] = e3; - r_.u64[ 4] = e4; - r_.u64[ 5] = e5; - r_.u64[ 6] = e6; - r_.u64[ 7] = e7; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, - int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, - int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, - int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) - return _mm512_set_epi8( - e63, e62, e61, e60, e59, e58, e57, e56, - e55, e54, e53, e52, e51, e50, e49, e48, - e47, e46, e45, e44, e43, e42, e41, e40, - e39, e38, e37, e36, e35, e34, e33, e32, - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0 - ); - #else - simde__m512i_private r_; - - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - r_.i8[32] = e32; - r_.i8[33] = e33; - r_.i8[34] = e34; - r_.i8[35] = e35; - r_.i8[36] = e36; - r_.i8[37] = e37; - r_.i8[38] = e38; - r_.i8[39] = e39; - r_.i8[40] = e40; - r_.i8[41] = e41; - r_.i8[42] = e42; - r_.i8[43] = e43; - r_.i8[44] = e44; - r_.i8[45] = e45; - r_.i8[46] = e46; - r_.i8[47] = e47; - r_.i8[48] = e48; - r_.i8[49] = e49; - r_.i8[50] = e50; - r_.i8[51] = e51; - r_.i8[52] = e52; - r_.i8[53] = e53; - r_.i8[54] = e54; - r_.i8[55] = e55; - r_.i8[56] = e56; - r_.i8[57] = e57; - r_.i8[58] = e58; - r_.i8[59] = e59; - r_.i8[60] = e60; - r_.i8[61] = e61; - r_.i8[62] = e62; - r_.i8[63] = e63; - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi8 - #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m128i v[] = { d, c, b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m128i[0] = d; - r_.m128i[1] = c; - r_.m128i[2] = b; - r_.m128i[3] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_set_m256 (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256 v[] = { b, a }; - return simde_mm512_load_ps(HEDLEY_STATIC_CAST(__m512 *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512_private r_; - - r_.m256[0] = b; - r_.m256[1] = a; - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256i v[] = { b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m256i[0] = b; - r_.m256i[1] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_set_m256d (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN_TO_64 simde__m256d v[] = { b, a }; - return simde_mm512_load_pd(HEDLEY_STATIC_CAST(__m512d *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512d_private r_; - - r_.m256d[0] = b; - r_.m256d[1] = a; - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e0; - r_.f32[ 1] = e1; - r_.f32[ 2] = e2; - r_.f32[ 3] = e3; - r_.f32[ 4] = e4; - r_.f32[ 5] = e5; - r_.f32[ 6] = e6; - r_.f32[ 7] = e7; - r_.f32[ 8] = e8; - r_.f32[ 9] = e9; - r_.f32[10] = e10; - r_.f32[11] = e11; - r_.f32[12] = e12; - r_.f32[13] = e13; - r_.f32[14] = e14; - r_.f32[15] = e15; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ps - #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - r_.f64[4] = e4; - r_.f64[5] = e5; - r_.f64[6] = e6; - r_.f64[7] = e7; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_pd - #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set_ph (simde_float16 e31, simde_float16 e30, simde_float16 e29, simde_float16 e28, simde_float16 e27, simde_float16 e26, simde_float16 e25, simde_float16 e24, - simde_float16 e23, simde_float16 e22, simde_float16 e21, simde_float16 e20, simde_float16 e19, simde_float16 e18, simde_float16 e17, simde_float16 e16, - simde_float16 e15, simde_float16 e14, simde_float16 e13, simde_float16 e12, simde_float16 e11, simde_float16 e10, simde_float16 e9, simde_float16 e8, - simde_float16 e7, simde_float16 e6, simde_float16 e5, simde_float16 e4, simde_float16 e3, simde_float16 e2, simde_float16 e1, simde_float16 e0) { - simde__m512h_private r_; - - r_.f16[0] = e0; - r_.f16[1] = e1; - r_.f16[2] = e2; - r_.f16[3] = e3; - r_.f16[4] = e4; - r_.f16[5] = e5; - r_.f16[6] = e6; - r_.f16[7] = e7; - r_.f16[8] = e8; - r_.f16[9] = e9; - r_.f16[10] = e10; - r_.f16[11] = e11; - r_.f16[12] = e12; - r_.f16[13] = e13; - r_.f16[14] = e14; - r_.f16[15] = e15; - r_.f16[16] = e16; - r_.f16[17] = e17; - r_.f16[18] = e18; - r_.f16[19] = e19; - r_.f16[20] = e20; - r_.f16[21] = e21; - r_.f16[22] = e22; - r_.f16[23] = e23; - r_.f16[24] = e24; - r_.f16[25] = e25; - r_.f16[26] = e26; - r_.f16[27] = e27; - r_.f16[28] = e28; - r_.f16[29] = e29; - r_.f16[30] = e30; - r_.f16[31] = e31; - - return simde__m512h_from_private(r_); -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ph - #define _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ -/* :: End simde/x86/avx512/set.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi8(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi8 - #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi16(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi16 - #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi32(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi32 - #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi64(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi64 - #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_pd(src, k, a); - #else - return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_pd - #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_ps(src, k, a); - #else - return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_ps - #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi8(src, k, a); - #else - simde__m256i_private - r_, - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi8 - #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi16(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi16 - #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi32(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi32 - #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi64(src, k, a); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi64 - #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_pd(src, k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_pd - #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_ps(src, k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_ps - #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi8(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi8 - #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi16(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi16 - #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi32(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi32 - #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi64(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi64 - #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_pd(src, k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_pd - #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_ps(src, k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_ps - #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_mask_mov_ph (simde__m512h src, simde__mmask32 k, simde__m512h a) { - return simde_mm512_castsi512_ph(simde_mm512_mask_mov_epi16(simde_mm512_castph_si512(src), k, simde_mm512_castph_si512(a))); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi8(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi8 - #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi16(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi16 - #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi32(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi32 - #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi64(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi64 - #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_pd(k, a); - #else - return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_pd - #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_ps(k, a); - #else - return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_ps - #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi8(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi8 - #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi16(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi16 - #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi32(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi32 - #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi64(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi64 - #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_pd(k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_pd - #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_ps(k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_ps - #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi8(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi8 - #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi16(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi16 - #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi32(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi32 - #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi64(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi64 - #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_pd(k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_pd - #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_ps(k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_ps - #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ -/* :: End simde/x86/avx512/mov.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_abs_epi8(src, k, a); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi8 - #define _mm_mask_abs_epi8(src, k, a) simde_mm_mask_abs_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi8(simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_abs_epi8(k, a); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi8 - #define _mm_maskz_abs_epi8(k, a) simde_mm_maskz_abs_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_abs_epi16(src, k, a); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi16 - #define _mm_mask_abs_epi16(src, k, a) simde_mm_mask_abs_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi16(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_abs_epi16(k, a); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi16 - #define _mm_maskz_abs_epi16(k, a) simde_mm_maskz_abs_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_abs_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi32 - #define _mm_mask_abs_epi32(src, k, a) simde_mm_mask_abs_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi32(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_abs_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi32 - #define _mm_maskz_abs_epi32(k, a) simde_mm_maskz_abs_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi64(simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_abs_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31); - return _mm_sub_epi64(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vabsq_s64(a_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64x2_t m = vshrq_n_s64(a_.neon_i64, 63); - r_.neon_i64 = vsubq_s64(veorq_s64(a_.neon_i64, m), m); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_abs(a_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_abs(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i64) z = { 0, }; - __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); - r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_abs_epi64 - #define _mm_abs_epi64(a) simde_mm_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_abs_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_abs_epi64(src, k, a); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_abs_epi64 - #define _mm_mask_abs_epi64(src, k, a) simde_mm_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_abs_epi64(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_abs_epi64(k, a); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_abs_epi64 - #define _mm_maskz_abs_epi64(k, a) simde_mm_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi64(simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_abs_epi64(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_abs_epi64(a_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi64 - #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_abs_epi64(src, k, a); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_abs_epi64 - #define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_abs_epi64(k, a); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_abs_epi64 - #define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi8(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi8(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi8 - #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi8 - #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi8 - #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi16(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi16(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi16 - #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi16 - #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi16 - #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi32(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi32(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi32(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi32 - #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi32 - #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi32 - #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi64(simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi64(a); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_abs_epi64(a_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi64 - #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi64 - #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi64 - #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_abs_ps(simde__m512 v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_abs_ps(v2); - #else - simde__m512_private - r_, - v2_ = simde__m512_to_private(v2); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].neon_f32 = vabsq_f32(v2_.m128_private[i].neon_f32); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128_private[i].altivec_f32 = vec_abs(v2_.m128_private[i].altivec_f32); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_ps - #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_mask_abs_ps(src, k, v2); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_ps - #define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_abs_pd(simde__m512d v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_87467) - return _mm512_abs_pd(v2); - #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ - return _mm512_abs_pd(_mm512_castpd_ps(v2)); - #else - simde__m512d_private - r_, - v2_ = simde__m512d_to_private(v2); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].neon_f64 = vabsq_f64(v2_.m128d_private[i].neon_f64); - } - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { - r_.m128d_private[i].altivec_f64 = vec_abs(v2_.m128d_private[i].altivec_f64); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_pd - #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_87467) - return _mm512_mask_abs_pd(src, k, v2); - #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ - return _mm512_mask_abs_pd(src, k, _mm512_castpd_ps(v2)); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_pd - #define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ABS_H) */ -/* :: End simde/x86/avx512/abs.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/add.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_ADD_H) -#define SIMDE_X86_AVX512_ADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi8(src, k, a, b); - #else - return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi8 - #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi8(k, a, b); - #else - return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi8 - #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_mask_add_epi16(src, k, a, b); - #else - return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi16 - #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_maskz_add_epi16(k, a, b); - #else - return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi16 - #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi32(src, k, a, b); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi32 - #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi32(k, a, b); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi32 - #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_add_epi64(src, k, a, b); - #else - return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_epi64 - #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_add_epi64(k, a, b); - #else - return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_epi64 - #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_mask_add_ss(src, k, a, b); - #elif 1 - simde__m128_private - src_ = simde__m128_to_private(src), - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_add_ss - #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - return _mm_maskz_add_ss(k, a, b); - #elif 1 - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - r_ = simde__m128_to_private(a); - - r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; - - return simde__m128_from_private(r_); - #else - return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_add_ss - #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_mask_add_epi16(src, k, a, b); - #else - return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi16 - #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_maskz_add_epi16(k, a, b); - #else - return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi16 - #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi32(src, k, a, b); - #else - return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi32 - #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi32(k, a, b); - #else - return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi32 - #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_add_epi64(src, k, a, b); - #else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_add_epi64 - #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_add_epi64(k, a, b); - #else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_add_epi64 - #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi8 - #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi8 - #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi8 - #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi16 - #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi16 - #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi16 - #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi32 - #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi32 - #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi32 - #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi64 - #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi64 - #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi64 - #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_add_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_ps - #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_ps - #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_ps - #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_add_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_pd - #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_pd - #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_pd - #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ -/* :: End simde/x86/avx512/add.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/cmp.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_CMP_H) -#define SIMDE_X86_AVX512_CMP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mov_mask.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) -#define SIMDE_X86_AVX512_MOV_MASK_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm_movepi8_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi8_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi8_mask - #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi16_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_movepi16_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* There is no 32-bit _mm_movemask_* function, so we use - * _mm_movemask_epi8 then extract the odd bits. */ - uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); - r = ( (r >> 1)) & UINT32_C(0x5555); - r = (r | (r >> 1)) & UINT32_C(0x3333); - r = (r | (r >> 2)) & UINT32_C(0x0f0f); - r = (r | (r >> 4)) & UINT32_C(0x00ff); - return HEDLEY_STATIC_CAST(simde__mmask8, r); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi16_mask - #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi32_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi32_mask(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi32_mask - #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm_movepi64_mask (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm_movepi64_mask(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm_movepi64_mask - #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm256_movepi8_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi8_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask32, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi8_mask - #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm256_movepi16_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_movepi16_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi16_mask - #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi32_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi32_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi32_mask - #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_movepi64_mask (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm256_movepi64_mask(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm256_movepi64_mask - #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_movepi8_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi8_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask64 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); - } - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return HEDLEY_STATIC_CAST(simde__mmask64, r); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi8_mask - #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_movepi16_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi16_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask32 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi16_mask - #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_movepi32_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi32_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask16 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi32_mask - #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_movepi64_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi64_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask8 r = 0; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { - r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); - } - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi64_mask - #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ -/* :: End simde/x86/avx512/mov_mask.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setzero.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512_SETZERO_H) -#define SIMDE_X86_AVX512_SETZERO_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setzero_si512(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_si512(); - #else - simde__m512i r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() - #undef _mm512_setzero_epi32 - #define _mm512_setzero_epi32() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setzero_ps(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_ps(); - #else - return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ps - #define _mm512_setzero_ps() simde_mm512_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setzero_pd(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_pd(); - #else - return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_pd - #define _mm512_setzero_pd() simde_mm512_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_setzero_ph(void) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_setzero_ph(); - #else - return simde_mm512_castsi512_ph(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_ph - #define _mm512_setzero_ph() simde_mm512_setzero_ph() -#endif - - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ -/* :: End simde/x86/avx512/setzero.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/setone.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512_SETONE_H) -#define SIMDE_X86_AVX512_SETONE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_setone_si512(void) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } - - return simde__m512i_from_private(r_); -} -#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_setone_ps(void) { - return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_setone_pd(void) { - return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_x_mm512_setone_ph(void) { - return simde_mm512_castsi512_ph(simde_x_mm512_setone_si512()); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ -/* :: End simde/x86/avx512/setone.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(__clang__) && SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 -SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmp_epi8_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 != b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] < b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), ~(a_.i8 <= b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = !(a_.i8[i] <= b_.i8[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi8_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi8_mask(a, b, imm8) _mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi8_mask - #define _mm512_cmp_epi8_mask(a, b, imm8) simde_mm512_cmp_epi8_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epi32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epi32_mask(a, b, imm8) _mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epi32_mask - #define _mm256_cmp_epi32_mask(a, b, imm8) simde_mm256_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512_private \ - simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ - simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ - simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ - simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ - } \ - \ - simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ps_mask - #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps_mask - #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps_mask - #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) - #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m512d_private \ - simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ - simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ - simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ - \ - for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ - simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ - } \ - \ - simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ - })) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_pd_mask - #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd_mask - #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) -#else - #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd_mask - #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_ph_mask (simde__m512h a, simde__m512h b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m512h_private - r_, - a_ = simde__m512h_to_private(a), - b_ = simde__m512h_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i]) - && !simde_isnanhf(a_.f16[i]) && !simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i])) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_to_float32(a_.f16[i]) != simde_float16_to_float32(a_.f16[i])) - || (simde_float16_to_float32(b_.f16[i]) != simde_float16_to_float32(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - || simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 == a_.f16) & (b_.f16 == b_.f16) & (a_.f16 != b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - !(simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - && (simde_float16_as_uint16(a_.f16[i]) != simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 < b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) < simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 <= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) <= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ((a_.f16 == a_.f16) & (b_.f16 == b_.f16))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) ? INT16_C(0) : ~INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 != a_.f16) | (b_.f16 != b_.f16) | (a_.f16 == b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - (simde_isnanhf(a_.f16[i]) || simde_isnanhf(b_.f16[i])) - || (simde_float16_as_uint16(a_.f16[i]) == simde_float16_as_uint16(b_.f16[i])) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = !( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512h_to_private(simde_mm512_setzero_ph()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 >= b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) >= simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_FLOAT16_VECTOR) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.f16 > b_.f16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.i16[i] = ( - simde_float16_to_float32(a_.f16[i]) > simde_float16_to_float32(b_.f16[i]) - ) ? ~INT16_C(0) : INT16_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m512h_to_private(simde_x_mm512_setone_ph()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde_mm512_castph_si512(simde__m512h_from_private(r_))); -} -#if defined(SIMDE_X86_AVX512FP16_NATIVE) - #define simde_mm512_cmp_ph_mask(a, b, imm8) _mm512_cmp_ph_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ph_mask - #define _mm512_cmp_ph_mask(a, b, imm8) simde_mm512_cmp_ph_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epi16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 == b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 != b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] < b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), ~(a_.i16 <= b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = !(a_.i16[i] <= b_.i16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epi16_mask(a, b, imm8) _mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi16_mask - #define _mm512_cmp_epi16_mask(a, b, imm8) simde_mm512_cmp_epi16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epi16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epi16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epi16_mask -#define _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epi16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epi32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 == b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 != b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] < b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.i32 <= b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = !(a_.i32[i] <= b_.i32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi32_mask(a, b, imm8) _mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi32_mask - #define _mm512_cmp_epi32_mask(a, b, imm8) simde_mm512_cmp_epi32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epi64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 == b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.i64 != b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 < b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] < b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.i64 <= b_.i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = !(a_.i64[i] <= b_.i64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epi64_mask(a, b, imm8) _mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epi64_mask - #define _mm512_cmp_epi64_mask(a, b, imm8) simde_mm512_cmp_epi64_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu16_mask - #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu16_mask -#define _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu16_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm256_cmp_epu32_mask (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m256i_to_private(simde_x_mm256_setone_si256()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm256_movepi32_mask(simde__m256i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_cmp_epu32_mask(a, b, imm8) _mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_epu32_mask - #define _mm256_cmp_epu32_mask(a, b, imm8) simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) _mm256_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm256_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm256_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_cmp_epu32_mask -#define _mm256_mask_cmp_epu32_mask(a, b, imm8) simde_mm256_mask_cmp_epu32_mask((a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_epu32_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 == b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (a_.u32 != b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 < b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), ~(a_.u32 <= b_.u32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = !(a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu32_mask(a, b, imm8) _mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu32_mask - #define _mm512_cmp_epu32_mask(a, b, imm8) simde_mm512_cmp_epu32_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu32_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu32_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu32_mask -#define _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu32_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_epu64_mask (simde__m512i a, simde__m512i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - switch (imm8) { - case SIMDE_MM_CMPINT_EQ: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 == b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_LE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_FALSE: - r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); - break; - - - case SIMDE_MM_CMPINT_NE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (a_.u64 != b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLT: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 < b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] < b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_NLE: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), ~(a_.u64 <= b_.u64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = !(a_.u64[i] <= b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - break; - - case SIMDE_MM_CMPINT_TRUE: - r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_cmp_epu64_mask(a, b, imm8) _mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_epu64_mask - #define _mm512_cmp_epu64_mask(a, b, imm8) simde_mm512_cmp_epu64_mask((a), (b), (imm8)) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) -#else - #define simde_mm512_mask_cmp_epu64_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu64_mask(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmp_epu64_mask -#define _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu64_mask((k1), (a), (b), (imm8)) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ -/* :: End simde/x86/avx512/cmp.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/copysign.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_COPYSIGN_H) -#define SIMDE_X86_AVX512_COPYSIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/and.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_AND_H) -#define SIMDE_X86_AVX512_AND_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_and_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_pd - #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_and_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_ps - #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_ps - #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_ps - #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_pd - #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_pd - #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi32 - #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi32 - #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi32 - #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi64 - #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi64 - #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi64 - #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_si512 - #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ -/* :: End simde/x86/avx512/and.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/andnot.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_ANDNOT_H) -#define SIMDE_X86_AVX512_ANDNOT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) -#else - #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_ps - #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_ps - #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_ps - #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) -#else - #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_pd - #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_pd - #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_pd - #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_andnot_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) -#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_si512 - #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi32 - #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi64 - #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi32 - #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi32 - #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_andnot_epi64 - #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_andnot_epi64 - #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ -/* :: End simde/x86/avx512/andnot.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/xor.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_XOR_H) -#define SIMDE_X86_AVX512_XOR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - /* TODO: generate reduced case to give to Intel */ - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_ps - #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_ps - #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_ps - #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_pd - #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_xor_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_pd - #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_xor_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_pd - #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi32 - #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi32 - #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi32 - #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi64 - #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi64 - #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi64 - #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); - r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_si512 - #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ -/* :: End simde/x86/avx512/xor.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { - simde__m512_private - r_, - dest_ = simde__m512_to_private(dest), - src_ = simde__m512_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); - #endif - - return simde__m512_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { - simde__m512d_private - r_, - dest_ = simde__m512d_to_private(dest), - src_ = simde__m512d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); - #endif - - return simde__m512d_from_private(r_); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_COPYSIGN_H) */ -/* :: End simde/x86/avx512/copysign.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/xorsign.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -/* This is a SIMDe extension which is not part of AVX-512. It exists - * because a lot of numerical methods in SIMDe have algoriths which do - * something like: - * - * float sgn = input < 0 ? -1 : 1; - * ... - * return res * sgn; - * - * Which can be replaced with a much more efficient call to xorsign: - * - * return simde_x_mm512_xorsign_ps(res, input); - * - * While this was originally intended for use in SIMDe, please feel - * free to use it in your code. - */ - -#if !defined(SIMDE_X86_AVX512_XORSIGN_H) -#define SIMDE_X86_AVX512_XORSIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/set1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SET1_H) -#define SIMDE_X86_AVX512_SET1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi8(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi8 - #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi8 - #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi8 - #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi16(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi16 - #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi16 - #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi16 - #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi32(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi32 - #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi32(src, k, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi32 - #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi32(k, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi32 - #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi64 (int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi64(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi64 - #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi64(src, k, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_set1_epi64 - #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi64(k, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_set1_epi64 - #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu8 (uint8_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu16 (uint16_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu32 (uint32_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu64 (uint64_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_ps(a); - #else - simde__m512_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ps - #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_pd(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_pd - #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512h -simde_mm512_set1_ph (simde_float16 a) { - #if defined(SIMDE_X86_AVX512FP16_NATIVE) - return _mm512_set1_ph(a); - #else - simde__m512h_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f16) / sizeof(r_.f16[0])) ; i++) { - r_.f16[i] = a; - } - - return simde__m512h_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ph - #define _mm512_set1_ph(a) simde_mm512_set1_ph(a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ -/* :: End simde/x86/avx512/set1.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { - return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { - return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_XORSIGN_H) */ -/* :: End simde/x86/avx512/xorsign.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/div.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_DIV_H) -#define SIMDE_X86_AVX512_DIV_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_div_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_ps - #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_ps - #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_ps - #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_div_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_pd - #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_pd - #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_pd - #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_DIV_H) */ -/* :: End simde/x86/avx512/div.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/fmadd.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_FMADD_H) -#define SIMDE_X86_AVX512_FMADD_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = (a_.f32 * b_.f32) + c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_ps - #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_fmadd_ps(a, k, b, c); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fmadd_ps - #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_fmadd_ps(k, a, b, c); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fmadd_ps - #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = (a_.f64 * b_.f64) + c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_pd - #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_FMADD_H) */ -/* :: End simde/x86/avx512/fmadd.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/mul.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_MUL_H) -#define SIMDE_X86_AVX512_MUL_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_ps - #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_ps - #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_ps - #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_pd - #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_pd - #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_pd - #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.i64) ta, tb; - - /* Get even numbered 32-bit values */ - x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - /* Cast to 64 bits */ - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); - r_.i64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); - } - #endif - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epi32 - #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epi32 - #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epi32 - #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.u64) ta, tb; - - x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); - r_.u64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epu32 - #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epu32 - #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epu32 - #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_MUL_H) */ -/* :: End simde/x86/avx512/mul.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/negate.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_NEGATE_H) -#define SIMDE_X86_AVX512_NEGATE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_negate_ps(simde__m512 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_negate_pd(simde__m512d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_NEGATE_H) */ -/* :: End simde/x86/avx512/negate.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/or.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_OR_H) -#define SIMDE_X86_AVX512_OR_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_or_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_ps - #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_ps - #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_ps - #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_or_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_pd - #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_or_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_pd - #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_or_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_pd - #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 | b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] | b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi32 - #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi32 - #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi32 - #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi64 - #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi64 - #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi64 - #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_si512 - #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ -/* :: End simde/x86/avx512/or.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sqrt.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512_SQRT_H) -#define SIMDE_X86_AVX512_SQRT_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); - r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_ps - #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); - r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_pd - #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SQRT_H) */ -/* :: End simde/x86/avx512/sqrt.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx512/sub.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512_SUB_H) -#define SIMDE_X86_AVX512_SUB_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi8 - #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_sub_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi8 - #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_sub_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi8 - #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi16 - #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi32 - #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi32 - #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi32 - #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi64 - #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi64 - #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi64 - #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_ps - #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_ps - #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_ps - #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_pd - #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_pd - #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_pd - #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512_SUB_H) */ -/* :: End simde/x86/avx512/sub.h :: */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-complex.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020-2021 Evan Nemerson - */ - -/* Support for complex math. - * - * We try to avoid inculding (in C++ mode) since it pulls in - * a *lot* of code. Unfortunately this only works for GNU modes (i.e., - * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals, - * but there is no way (AFAICT) to detect that flag so we have to rely - * on __STRICT_ANSI__ to instead detect GNU mode. - * - * This header is separate from simde-math.h since there is a good - * chance it will pull in , and most of the time we don't need - * complex math (on x86 only SVML uses it). */ - -#if !defined(SIMDE_COMPLEX_H) -#define SIMDE_COMPLEX_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if ( \ - HEDLEY_HAS_BUILTIN(__builtin_creal) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) \ - ) && (!defined(__cplusplus) && !defined(__STRICT_ANSI__)) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ - typedef __complex__ float simde_cfloat32; - typedef __complex__ double simde_cfloat64; - HEDLEY_DIAGNOSTIC_POP - #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) - #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) - - #if !defined(simde_math_creal) - #define simde_math_crealf(z) __builtin_crealf(z) - #endif - #if !defined(simde_math_crealf) - #define simde_math_creal(z) __builtin_creal(z) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimagf(z) __builtin_cimagf(z) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimag(z) __builtin_cimag(z) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) __builtin_cexp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) __builtin_cexpf(z) - #endif -#elif !defined(__cplusplus) - #include - - #if !defined(HEDLEY_MSVC_VERSION) - typedef float _Complex simde_cfloat32; - typedef double _Complex simde_cfloat64; - #else - typedef _Fcomplex simde_cfloat32; - typedef _Dcomplex simde_cfloat64; - #endif - - #if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) }) - #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) }) - #elif defined(CMPLX) && defined(CMPLXF) - #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) - #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) - #else - #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) - #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) - #endif - - #if !defined(simde_math_creal) - #define simde_math_creal(z) creal(z) - #endif - #if !defined(simde_math_crealf) - #define simde_math_crealf(z) crealf(z) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimag(z) cimag(z) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimagf(z) cimagf(z) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) cexp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) cexpf(z) - #endif -#else - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - #pragma warning(disable:4530) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - - typedef std::complex simde_cfloat32; - typedef std::complex simde_cfloat64; - #define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) - #define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) - - #if !defined(simde_math_creal) - #define simde_math_creal(z) ((z).real()) - #endif - #if !defined(simde_math_crealf) - #define simde_math_crealf(z) ((z).real()) - #endif - #if !defined(simde_math_cimag) - #define simde_math_cimag(z) ((z).imag()) - #endif - #if !defined(simde_math_cimagf) - #define simde_math_cimagf(z) ((z).imag()) - #endif - #if !defined(simde_math_cexp) - #define simde_math_cexp(z) std::exp(z) - #endif - #if !defined(simde_math_cexpf) - #define simde_math_cexpf(z) std::exp(z) - #endif -#endif - -#endif /* !defined(SIMDE_COMPLEX_H) */ -/* :: End simde/simde-complex.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_acos_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf4_u10(a); - #else - return Sleef_acosf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acos_ps - #define _mm_acos_ps(a) simde_mm_acos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_acos_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd2_u10(a); - #else - return Sleef_acosd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acos_pd - #define _mm_acos_pd(a) simde_mm_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_acos_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf8_u10(a); - #else - return Sleef_acosf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_acos_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acos_ps - #define _mm256_acos_ps(a) simde_mm256_acos_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_acos_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd4_u10(a); - #else - return Sleef_acosd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acos_pd - #define _mm256_acos_pd(a) simde_mm256_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_acos_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosf16_u10(a); - #else - return Sleef_acosf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acosf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acos_ps - #define _mm512_acos_ps(a) simde_mm512_acos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_acos_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_acosd8_u10(a); - #else - return Sleef_acosd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acos(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acos_pd - #define _mm512_acos_pd(a) simde_mm512_acos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acos_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acos_ps - #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acos_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acos_pd - #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_acosh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acosh_ps - #define _mm_acosh_ps(a) simde_mm_acosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_acosh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_acosh_pd - #define _mm_acosh_pd(a) simde_mm_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_acosh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acosh_ps - #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_acosh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_acosh_pd - #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_acosh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_acoshf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acosh_ps - #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_acosh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_acosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_acosh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_acosh_pd - #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acosh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acosh_ps - #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_acosh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_acosh_pd - #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_asin_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf4_u10(a); - #else - return Sleef_asinf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asin_ps - #define _mm_asin_ps(a) simde_mm_asin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_asin_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind2_u10(a); - #else - return Sleef_asind2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asin_pd - #define _mm_asin_pd(a) simde_mm_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_asin_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf8_u10(a); - #else - return Sleef_asinf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_asin_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asin_ps - #define _mm256_asin_ps(a) simde_mm256_asin_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_asin_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind4_u10(a); - #else - return Sleef_asind4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asin_pd - #define _mm256_asin_pd(a) simde_mm256_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_asin_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asinf16_u10(a); - #else - return Sleef_asinf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asin_ps - #define _mm512_asin_ps(a) simde_mm512_asin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_asin_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_asind8_u10(a); - #else - return Sleef_asind8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asin(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asin_pd - #define _mm512_asin_pd(a) simde_mm512_asin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asin_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asin_ps - #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asin_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asin_pd - #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_asinh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asinh_ps - #define _mm_asinh_ps(a) simde_mm_asinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_asinh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_asinh_pd - #define _mm_asinh_pd(a) simde_mm_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_asinh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asinh_ps - #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_asinh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_asinh_pd - #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_asinh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_asinhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asinh_ps - #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_asinh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_asinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_asinh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_asinh_pd - #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asinh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asinh_ps - #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_asinh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_asinh_pd - #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atan_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf4_u10(a); - #else - return Sleef_atanf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan_ps - #define _mm_atan_ps(a) simde_mm_atan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atan_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand2_u10(a); - #else - return Sleef_atand2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan_pd - #define _mm_atan_pd(a) simde_mm_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atan_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf8_u10(a); - #else - return Sleef_atanf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atan_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan_ps - #define _mm256_atan_ps(a) simde_mm256_atan_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atan_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand4_u10(a); - #else - return Sleef_atand4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan_pd - #define _mm256_atan_pd(a) simde_mm256_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atan_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atanf16_u10(a); - #else - return Sleef_atanf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan_ps - #define _mm512_atan_ps(a) simde_mm512_atan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atan_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atand8_u10(a); - #else - return Sleef_atand8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan_pd - #define _mm512_atan_pd(a) simde_mm512_atan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan_ps - #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan_pd - #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atan2_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f4_u10(a, b); - #else - return Sleef_atan2f4_u35(a, b); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan2_ps - #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atan2_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d2_u10(a, b); - #else - return Sleef_atan2d2_u35(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atan2_pd - #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f8_u10(a, b); - #else - return Sleef_atan2f8_u35(a, b); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan2_ps - #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d4_u10(a, b); - #else - return Sleef_atan2d4_u35(a, b); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atan2_pd - #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan2_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2f16_u10(a, b); - #else - return Sleef_atan2f16_u35(a, b); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan2_ps - #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atan2_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_atan2d8_u10(a, b); - #else - return Sleef_atan2d8_u35(a, b); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atan2_pd - #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan2_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan2_ps - #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atan2_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atan2_pd - #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_atanh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atanh_ps - #define _mm_atanh_ps(a) simde_mm_atanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_atanh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_atanh_pd - #define _mm_atanh_pd(a) simde_mm_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_atanh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atanh_ps - #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_atanh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_atanh_pd - #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_atanh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_atanhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atanh_ps - #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_atanh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_atanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_atanh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_atanh_pd - #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atanh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atanh_ps - #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_atanh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_atanh_pd - #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cbrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cbrt_ps - #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cbrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cbrt_pd - #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cbrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cbrt_ps - #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cbrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cbrt_pd - #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cbrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cbrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cbrtf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cbrt_ps - #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cbrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cbrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cbrt(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cbrt_pd - #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cbrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cbrt_ps - #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cbrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cbrt_pd - #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cexp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cexp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); - r_.f32[ i ] = simde_math_crealf(val); - r_.f32[i + 1] = simde_math_cimagf(val); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cexp_ps - #define _mm_cexp_ps(a) simde_mm_cexp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cexp_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cexp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); - r_.f32[ i ] = simde_math_crealf(val); - r_.f32[i + 1] = simde_math_cimagf(val); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cexp_ps - #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cos_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf4_u10(a); - #else - return Sleef_cosf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cos_ps - #define _mm_cos_ps(a) simde_mm_cos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cos_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd2_u10(a); - #else - return Sleef_cosd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cos_pd - #define _mm_cos_pd(a) simde_mm_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cos_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf8_u10(a); - #else - return Sleef_cosf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cos_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cos_ps - #define _mm256_cos_ps(a) simde_mm256_cos_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cos_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd4_u10(a); - #else - return Sleef_cosd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cos_pd - #define _mm256_cos_pd(a) simde_mm256_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cos_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cos_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf16_u10(a); - #else - return Sleef_cosf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cos_ps - #define _mm512_cos_ps(a) simde_mm512_cos_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cos_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cos_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd8_u10(a); - #else - return Sleef_cosd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cos_pd - #define _mm512_cos_pd(a) simde_mm512_cos_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cos_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cos_ps - #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cos_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cos_pd - #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deg2rad_ps(simde__m128 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deg2rad_pd(simde__m128d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) - return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deg2rad_ps(simde__m256 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) - return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F - }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deg2rad_pd(simde__m256d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) - return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_deg2rad_ps(simde__m512 a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) - return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F)); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, - SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F - }; - r_.f32 = a_.f32 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_deg2radf(a_.f32[i]); - } - - #endif - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_deg2rad_pd(simde__m512d a) { - #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) - return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180)); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { - SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, - SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 - }; - r_.f64 = a_.f64 * tmp; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_deg2rad(a_.f64[i]); - } - - #endif - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cosd_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosd_ps - #define _mm_cosd_ps(a) simde_mm_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cosd_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosd_pd - #define _mm_cosd_pd(a) simde_mm_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cosd_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosd_ps - #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cosd_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosd_pd - #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cosd_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosd_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosd_ps - #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cosd_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosd_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosd_pd - #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosd_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosd_ps - #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosd_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosd_pd - #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cosh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosh_ps - #define _mm_cosh_ps(a) simde_mm_cosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cosh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cosh_pd - #define _mm_cosh_pd(a) simde_mm_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cosh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosh_ps - #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cosh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cosh_pd - #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cosh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_coshf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosh_ps - #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cosh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cosh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cosh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cosh_pd - #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosh_ps - #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cosh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cosh_pd - #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi8 - #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi16 - #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi32 - #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b) - #undef _mm_idiv_epi32 - #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epi64 - #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu8 - #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu16 - #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu32 - #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b) - #undef _mm_udiv_epi32 - #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_div_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_div_epu64 - #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi8 - #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi16 - #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi32 - #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b) - #undef _mm256_idiv_epi32 - #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epi64 - #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu8 - #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu16 - #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu32 - #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b) - #undef _mm256_udiv_epi32 - #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_epu64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_epu64 - #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 / b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] / b_.i8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi8 - #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 / b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] / b_.i16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi16 - #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 / b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] / b_.i32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi32 - #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_epi32 - #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 / b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] / b_.i64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epi64 - #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = a_.u8 / b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu8 - #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = a_.u16 / b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu16 - #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 / b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu32 - #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_epu32 - #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 / b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_epu64 - #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erf_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erff4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erf_ps - #define _mm_erf_ps(a) simde_mm_erf_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erf_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erf_pd - #define _mm_erf_pd(a) simde_mm_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erf_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erff8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erf_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erf_ps - #define _mm256_erf_ps(a) simde_mm256_erf_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erf_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erf_pd - #define _mm256_erf_pd(a) simde_mm256_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erf_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erf_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erff16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erff(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erf_ps - #define _mm512_erf_ps(a) simde_mm512_erf_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erf_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erf_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erf(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erf_pd - #define _mm512_erf_pd(a) simde_mm512_erf_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erf_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erf_ps - #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erf_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erf_pd - #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfc_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcf4_u15(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfc_ps - #define _mm_erfc_ps(a) simde_mm_erfc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfc_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcd2_u15(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfc_pd - #define _mm_erfc_pd(a) simde_mm_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfc_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcf8_u15(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfc_ps - #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfc_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcd4_u15(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfc_pd - #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfc_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcf16_u15(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfc_ps - #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfc_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcd8_u15(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfc(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfc_pd - #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfc_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfc_ps - #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfc_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfc_pd - #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp_ps - #define _mm_exp_ps(a) simde_mm_exp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp_pd - #define _mm_exp_pd(a) simde_mm_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp_ps - #define _mm256_exp_ps(a) simde_mm256_exp_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp_pd - #define _mm256_exp_pd(a) simde_mm256_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp_ps - #define _mm512_exp_ps(a) simde_mm512_exp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp_pd - #define _mm512_exp_pd(a) simde_mm512_exp_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp_ps - #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp_pd - #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_expm1_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_expm1_ps - #define _mm_expm1_ps(a) simde_mm_expm1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_expm1_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_expm1_pd - #define _mm_expm1_pd(a) simde_mm_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_expm1_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_expm1_ps - #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_expm1_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_expm1_pd - #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_expm1_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_expm1_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_expm1f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_expm1_ps - #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_expm1_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_expm1_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_expm1(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_expm1_pd - #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_expm1_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_expm1_ps - #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_expm1_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_expm1_pd - #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp2_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp2_ps - #define _mm_exp2_ps(a) simde_mm_exp2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp2_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp2_pd - #define _mm_exp2_pd(a) simde_mm_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp2_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp2_ps - #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp2_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp2_pd - #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp2_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp2f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp2_ps - #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp2_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp2(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp2_pd - #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp2_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp2_ps - #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp2_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp2_pd - #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_exp10_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp10_ps - #define _mm_exp10_ps(a) simde_mm_exp10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_exp10_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_exp10_pd - #define _mm_exp10_pd(a) simde_mm_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_exp10_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp10_ps - #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_exp10_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_exp10_pd - #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_exp10_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_exp10f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp10_ps - #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_exp10_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_exp10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_exp10(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_exp10_pd - #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp10_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp10_ps - #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_exp10_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_exp10_pd - #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cdfnorm_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m128 y = simde_mm_mul_ps(a5, t); - y = simde_mm_add_ps(y, a4); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a3); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a2); - y = simde_mm_mul_ps(y, t); - y = simde_mm_add_ps(y, a1); - y = simde_mm_mul_ps(y, t); - y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x)))); - y = simde_mm_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorm_ps - #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cdfnorm_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m128d y = simde_mm_mul_pd(a5, t); - y = simde_mm_add_pd(y, a4); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a3); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a2); - y = simde_mm_mul_pd(y, t); - y = simde_mm_add_pd(y, a1); - y = simde_mm_mul_pd(y, t); - y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x)))); - y = simde_mm_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorm_pd - #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cdfnorm_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m256 y = simde_mm256_mul_ps(a5, t); - y = simde_mm256_add_ps(y, a4); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a3); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a2); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_add_ps(y, a1); - y = simde_mm256_mul_ps(y, t); - y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x)))); - y = simde_mm256_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorm_ps - #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cdfnorm_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m256d y = simde_mm256_mul_pd(a5, t); - y = simde_mm256_add_pd(y, a4); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a3); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a2); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_add_pd(y, a1); - y = simde_mm256_mul_pd(y, t); - y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x)))); - y = simde_mm256_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorm_pd - #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cdfnorm_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorm_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); - const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); - const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741)); - const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); - const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429)); - const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911)); - const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); - - /* simde_math_fabsf(x) / sqrtf(2.0) */ - const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m512 y = simde_mm512_mul_ps(a5, t); - y = simde_mm512_add_ps(y, a4); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a3); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a2); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_add_ps(y, a1); - y = simde_mm512_mul_ps(y, t); - y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x)))); - y = simde_mm512_sub_ps(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a))); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorm_ps - #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cdfnorm_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorm_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://www.johndcook.com/blog/cpp_phi/ */ - const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); - const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); - const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741)); - const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); - const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429)); - const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911)); - const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); - - /* simde_math_fabs(x) / sqrt(2.0) */ - const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)))); - - /* 1.0 / (1.0 + p * x) */ - const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x))); - - /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ - simde__m512d y = simde_mm512_mul_pd(a5, t); - y = simde_mm512_add_pd(y, a4); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a3); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a2); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_add_pd(y, a1); - y = simde_mm512_mul_pd(y, t); - y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x)))); - y = simde_mm512_sub_pd(one, y); - - /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ - return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a))); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorm_pd - #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorm_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorm_ps - #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorm_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorm_pd - #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b); - #else - simde__m128i r; - - r = simde_mm_div_epi32(a, b); - *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_idivrem_epi32 - #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); - #else - simde__m256i r; - - r = simde_mm256_div_epi32(a, b); - *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_idivrem_epi32 - #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hypot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf4_u05(a, b); - #else - return Sleef_hypotf4_u35(a, b); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_hypot_ps - #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hypot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd2_u05(a, b); - #else - return Sleef_hypotd2_u35(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_hypot_pd - #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf8_u05(a, b); - #else - return Sleef_hypotf8_u35(a, b); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_hypot_ps - #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd4_u05(a, b); - #else - return Sleef_hypotd4_u35(a, b); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_hypot_pd - #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_hypot_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotf16_u05(a, b); - #else - return Sleef_hypotf16_u35(a, b); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_hypot_ps - #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_hypot_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_hypotd8_u05(a, b); - #else - return Sleef_hypotd8_u35(a, b); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_hypot_pd - #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_hypot_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_hypot_ps - #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_hypot_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_hypot_pd - #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_invcbrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invcbrt_ps(a); - #else - return simde_mm_rcp_ps(simde_mm_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invcbrt_ps - #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_invcbrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invcbrt_pd(a); - #else - return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invcbrt_pd - #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_invcbrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invcbrt_ps(a); - #else - return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invcbrt_ps - #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_invcbrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invcbrt_pd(a); - #else - return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invcbrt_pd - #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_invsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invsqrt_ps(a); - #else - return simde_mm_rcp_ps(simde_mm_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invsqrt_ps - #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_invsqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_invsqrt_pd(a); - #else - return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_invsqrt_pd - #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_invsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invsqrt_ps(a); - #else - return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invsqrt_ps - #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_invsqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_invsqrt_pd(a); - #else - return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_invsqrt_pd - #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_invsqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_invsqrt_ps(a); - #else - return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_invsqrt_ps - #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_invsqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_invsqrt_pd(a); - #else - return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_invsqrt_pd - #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_invsqrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_invsqrt_ps - #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_invsqrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_invsqrt_pd - #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf4_u10(a); - #else - return Sleef_logf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log_ps - #define _mm_log_ps(a) simde_mm_log_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd2_u10(a); - #else - return Sleef_logd2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log_pd - #define _mm_log_pd(a) simde_mm_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf8_u10(a); - #else - return Sleef_logf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log_ps - #define _mm256_log_ps(a) simde_mm256_log_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd4_u10(a); - #else - return Sleef_logd4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log_pd - #define _mm256_log_pd(a) simde_mm256_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logf16_u10(a); - #else - return Sleef_logf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log_ps - #define _mm512_log_ps(a) simde_mm512_log_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_logd8_u10(a); - #else - return Sleef_logd8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log_pd - #define _mm512_log_pd(a) simde_mm512_log_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log_ps - #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log_pd - #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cdfnorminv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128 matched, retval = simde_mm_setzero_ps(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)))); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* else if (a == 1) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425))); - /* else if (a > 0.97575) */ - simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575))); - - simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi); - matched = simde_mm_or_ps(matched, mask); - - /* else */ - simde__m128 mask_el = simde_x_mm_not_ps(matched); - mask = simde_mm_or_ps(mask, mask_el); - - /* r = a - 0.5f */ - simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m128 q = simde_mm_and_ps(mask_lo, a); - q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a))); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm_log_ps(q); - q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); - { - simde__m128 multiplier; - multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0))); - multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0)))); - multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r)); - numerator = simde_mm_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), - simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - return retval; - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorminv_ps - #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cdfnorminv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128d matched, retval = simde_mm_setzero_pd(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)))); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* else if (a == 1) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425))); - /* else if (a > 0.97575) */ - simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575))); - - simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi); - matched = simde_mm_or_pd(matched, mask); - - /* else */ - simde__m128d mask_el = simde_x_mm_not_pd(matched); - mask = simde_mm_or_pd(mask, mask_el); - - /* r = a - 0.5 */ - simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m128d q = simde_mm_and_pd(mask_lo, a); - q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a))); - - /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ - q = simde_mm_log_pd(q); - q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el); - - /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ - /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ - /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); - { - simde__m128d multiplier; - multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0))); - multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0)))); - multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r)); - numerator = simde_mm_mul_pd(numerator, multiplier); - } - - /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), - simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - return retval; - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_cdfnorminv_pd - #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_cdfnorminv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256 matched, retval = simde_mm256_setzero_ps(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ)); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* else if (a == 1) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi); - matched = simde_mm256_or_ps(matched, mask); - - /* else */ - simde__m256 mask_el = simde_x_mm256_not_ps(matched); - mask = simde_mm256_or_ps(mask, mask_el); - - /* r = a - 0.5f */ - simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m256 q = simde_mm256_and_ps(mask_lo, a); - q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a))); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm256_log_ps(q); - q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm256_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); - { - simde__m256 multiplier; - multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0))); - multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0)))); - multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r)); - numerator = simde_mm256_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), - simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - return retval; - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorminv_ps - #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cdfnorminv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256d matched, retval = simde_mm256_setzero_pd(); - - { /* if (a < 0 || a > 1) */ - matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ)); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* else if (a == 1) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* Remaining conditions. - * - * Including the else case in this complicates things a lot, but - * we're using cheap operations to get rid of expensive multiply - * and add functions. This should be a small improvement on SSE - * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is - * very fast and this becomes a huge win. NEON, AltiVec, and - * WASM also have blend operations, so this should be a big win - * there, too. */ - - /* else if (a < 0.02425) */ - simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi); - matched = simde_mm256_or_pd(matched, mask); - - /* else */ - simde__m256d mask_el = simde_x_mm256_not_pd(matched); - mask = simde_mm256_or_pd(mask, mask_el); - - /* r = a - 0.5 */ - simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m256d q = simde_mm256_and_pd(mask_lo, a); - q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a))); - - /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ - q = simde_mm256_log_pd(q); - q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm256_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el); - - /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ - /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ - /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); - numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); - { - simde__m256d multiplier; - multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0))); - multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0)))); - multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r)); - numerator = simde_mm256_mul_pd(numerator, multiplier); - } - - /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), - simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); - denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - return retval; - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_cdfnorminv_pd - #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_cdfnorminv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorminv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]); - } - - return simde__m512_from_private(r_); - #else - - simde__m512 retval = simde_mm512_setzero_ps(); - simde__mmask16 matched; - - { /* if (a < 0 || a > 1) */ - matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)); - } - - { /* else if (a == 1) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF)); - } - - { /* else if (a < 0.02425) */ - simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__mmask16 mask = mask_lo | mask_hi; - matched = matched | mask; - - /* else */ - simde__mmask16 mask_el = ~matched; - - /* r = a - 0.5f */ - simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a); - q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm512_log_ps(q); - q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0))); - q = simde_mm512_sqrt_ps(q); - - /* el: q = r * r */ - q = simde_mm512_mask_mul_ps(q, mask_el, r, r); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)))); - numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)))); - { - simde__m512 multiplier; - multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)); - multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0))); - multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r); - numerator = simde_mm512_mul_ps(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)))); - denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q), - simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)))); - denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0))); - - /* res = numerator / denominator; */ - retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorminv_ps - #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_cdfnorminv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cdfnorminv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]); - } - - return simde__m512d_from_private(r_); - #else - - simde__m512d retval = simde_mm512_setzero_pd(); - simde__mmask8 matched; - - { /* if (a < 0 || a > 1) */ - matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ); - - /* We don't actually need to do anything here since we initialize - * retval to 0.0. */ - } - - { /* else if (a == 0) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)); - } - - { /* else if (a == 1) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - matched |= mask; - - retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY)); - } - - { /* else if (a < 0.02425) */ - simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); - /* else if (a > 0.97575) */ - simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); - - simde__mmask8 mask = mask_lo | mask_hi; - matched = matched | mask; - - /* else */ - simde__mmask8 mask_el = ~matched; - - /* r = a - 0.5f */ - simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5))); - - /* lo: q = a - * hi: q = (1.0 - a) */ - simde__m512d q = a; - q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); - - /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ - q = simde_mm512_log_pd(q); - q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0))); - q = simde_mm512_sqrt_pd(q); - - /* el: q = r * r */ - q = simde_mm512_mask_mul_pd(q, mask_el, r, r); - - /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ - /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ - /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ - simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)))); - numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)))); - { - simde__m512d multiplier; - multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)); - multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0))); - multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r); - numerator = simde_mm512_mul_pd(numerator, multiplier); - } - - /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ - /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ - simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)))); - denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q), - simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)))); - denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0))); - - /* res = numerator / denominator; */ - retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_cdfnorminv_pd - #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorminv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorminv_ps - #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cdfnorminv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cdfnorminv_pd - #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfinv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */ - simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); - - simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a))); - - simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm_mul_ps(tt2, lnx); - - simde__m128 r = simde_mm_mul_ps(tt1, tt1); - r = simde_mm_sub_ps(r, tt2); - r = simde_mm_sqrt_ps(r); - r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r); - r = simde_mm_sqrt_ps(r); - - return simde_x_mm_xorsign_ps(r, a); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfinv_ps - #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfinv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); - - simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a))); - - simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm_mul_pd(tt2, lnx); - - simde__m128d r = simde_mm_mul_pd(tt1, tt1); - r = simde_mm_sub_pd(r, tt2); - r = simde_mm_sqrt_pd(r); - r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r); - r = simde_mm_sqrt_pd(r); - - return simde_x_mm_xorsign_pd(r, a); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfinv_pd - #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfinv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); - simde__m256 sgn = simde_x_mm256_copysign_ps(one, a); - - a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a)); - simde__m256 lnx = simde_mm256_log_ps(a); - - simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm256_mul_ps(tt2, lnx); - - simde__m256 r = simde_mm256_mul_ps(tt1, tt1); - r = simde_mm256_sub_ps(r, tt2); - r = simde_mm256_sqrt_ps(r); - r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r); - r = simde_mm256_sqrt_ps(r); - - return simde_mm256_mul_ps(sgn, r); - #else - simde__m256_private - a_ = simde__m256_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfinv_ps - #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfinv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); - simde__m256d sgn = simde_x_mm256_copysign_pd(one, a); - - a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a)); - simde__m256d lnx = simde_mm256_log_pd(a); - - simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm256_mul_pd(tt2, lnx); - - simde__m256d r = simde_mm256_mul_pd(tt1, tt1); - r = simde_mm256_sub_pd(r, tt2); - r = simde_mm256_sqrt_pd(r); - r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r); - r = simde_mm256_sqrt_pd(r); - - return simde_mm256_mul_pd(sgn, r); - #else - simde__m256d_private - a_ = simde__m256d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfinv_pd - #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfinv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfinv_ps(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); - simde__m512 sgn = simde_x_mm512_copysign_ps(one, a); - - a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a)); - simde__m512 lnx = simde_mm512_log_ps(a); - - simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147))); - tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); - tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); - - simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); - tt2 = simde_mm512_mul_ps(tt2, lnx); - - simde__m512 r = simde_mm512_mul_ps(tt1, tt1); - r = simde_mm512_sub_ps(r, tt2); - r = simde_mm512_sqrt_ps(r); - r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r); - r = simde_mm512_sqrt_ps(r); - - return simde_mm512_mul_ps(sgn, r); - #else - simde__m512_private - a_ = simde__m512_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfinvf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfinv_ps - #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfinv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfinv_pd(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); - simde__m512d sgn = simde_x_mm512_copysign_pd(one, a); - - a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a)); - simde__m512d lnx = simde_mm512_log_pd(a); - - simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147))); - tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); - tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); - - simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); - tt2 = simde_mm512_mul_pd(tt2, lnx); - - simde__m512d r = simde_mm512_mul_pd(tt1, tt1); - r = simde_mm512_sub_pd(r, tt2); - r = simde_mm512_sqrt_pd(r); - r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r); - r = simde_mm512_sqrt_pd(r); - - return simde_mm512_mul_pd(sgn, r); - #else - simde__m512d_private - a_ = simde__m512d_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfinv(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfinv_pd - #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfinv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfinv_ps - #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfinv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfinv_pd - #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_erfcinv_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128 matched, retval = simde_mm_setzero_ps(); - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); - matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)))); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) { - retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))); - mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)))); - mask = simde_mm_andnot_ps(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); - t = simde_mm_sqrt_ps(t); - t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m128 p[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910)) - }; - - const simde__m128 q[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm_fmadd_ps(denominator, t, q[0]); - - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); - t = simde_mm_sqrt_ps(t); - t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m128 p[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) - }; - - const simde__m128 q[] = { - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm_fmadd_ps(denominator, t, q[0]); - - simde__m128 res = simde_mm_div_ps(numerator, denominator); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - - if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); - mask = simde_mm_andnot_ps(matched, mask); - matched = simde_mm_or_ps(matched, mask); - - simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfcinv_ps - #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_erfcinv_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - simde__m128d matched, retval = simde_mm_setzero_pd(); - - { /* if (a < 2.0 && a > 0.0625) */ - matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))); - matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)))); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) { - retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625 && a > 0.0) */ - simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))); - mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)))); - mask = simde_mm_andnot_pd(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); - t = simde_mm_sqrt_pd(t); - t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m128d p[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910)) - }; - - const simde__m128d q[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm_fmadd_pd(denominator, t, q[0]); - - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - } - - { /* else if (a < 0.0) */ - simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - - if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { - matched = simde_mm_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); - t = simde_mm_sqrt_pd(t); - t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m128d p[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) - }; - - const simde__m128d q[] = { - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm_fmadd_pd(denominator, t, q[0]); - - simde__m128d res = simde_mm_div_pd(numerator, denominator); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - - if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0) */ - simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); - mask = simde_mm_andnot_pd(matched, mask); - matched = simde_mm_or_pd(matched, mask); - - simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0) */ - retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfcinv(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_erfcinv_pd - #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_erfcinv_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256 matched, retval = simde_mm256_setzero_ps(); - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); - matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ)); - - if (!simde_mm256_testz_ps(matched, matched)) { - retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ)); - mask = simde_mm256_andnot_ps(matched, mask); - - if (!simde_mm256_testz_ps(mask, mask)) { - matched = simde_mm256_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); - t = simde_mm256_sqrt_ps(t); - t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m256 p[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) - }; - - const simde__m256 q[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); - - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - - if (!simde_mm256_testz_ps(mask, mask)) { - matched = simde_mm256_or_ps(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); - t = simde_mm256_sqrt_ps(t); - t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m256 p[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000)) - }; - - const simde__m256 q[] = { - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); - - simde__m256 res = simde_mm256_div_ps(numerator, denominator); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - - if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_ps(matched, mask); - matched = simde_mm256_or_ps(matched, mask); - - simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfcinv_ps - #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_erfcinv_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) - simde__m256d matched, retval = simde_mm256_setzero_pd(); - - { /* if (a < 2.0 && a > 0.0625) */ - matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); - matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ)); - - if (!simde_mm256_testz_pd(matched, matched)) { - retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { - return retval; - } - } - - { /* else if (a < 0.0625 && a > 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ)); - mask = simde_mm256_andnot_pd(matched, mask); - - if (!simde_mm256_testz_pd(mask, mask)) { - matched = simde_mm256_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); - t = simde_mm256_sqrt_pd(t); - t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m256d p[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) - }; - - const simde__m256d q[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); - - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - } - - { /* else if (a < 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - - if (!simde_mm256_testz_pd(mask, mask)) { - matched = simde_mm256_or_pd(matched, mask); - - /* t = 1/(sqrt(-log(a))) */ - simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); - t = simde_mm256_sqrt_pd(t); - t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m256d p[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000)) - }; - - const simde__m256d q[] = { - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); - - simde__m256d res = simde_mm256_div_pd(numerator, denominator); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - - if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { - return retval; - } - } - } - - { /* else if (a == 0.0) */ - simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = simde_mm256_andnot_pd(matched, mask); - matched = simde_mm256_or_pd(matched, mask); - - simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0) */ - retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_erfcinv(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_erfcinv_pd - #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_erfcinv_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfcinv_ps(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64)) - /* The results on Arm are *slightly* off, which causes problems for - * the edge cases; for example, if you pass 2.0 sqrt will be called - * with a value of -0.0 instead of 0.0, resulting in a NaN. */ - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]); - } - return simde__m512_from_private(r_); - #else - simde__m512 retval = simde_mm512_setzero_ps(); - simde__mmask16 matched; - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); - matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ); - - if (matched != 0) { - retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); - } - - if (matched == 1) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); - mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); - t = simde_mm512_sqrt_ps(t); - t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m512 p[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) - }; - - const simde__m512 q[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[3]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[2]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); - - simde__m512 res = simde_mm512_div_ps(numerator, denominator); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); - t = simde_mm512_sqrt_ps(t); - t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); - - const simde__m512 p[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) - }; - - const simde__m512 q[] = { - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), - simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]); - numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); - numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); - denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); - - simde__m512 res = simde_mm512_div_ps(numerator, denominator); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - - if (matched == 1) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = ~matched & mask; - matched = matched | mask; - - simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF); - - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF))); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfcinv_ps - #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_erfcinv_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_erfcinv_pd(a); - #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]); - } - return simde__m512d_from_private(r_); - #else - simde__m512d retval = simde_mm512_setzero_pd(); - simde__mmask8 matched; - - { /* if (a < 2.0f && a > 0.0625f) */ - matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); - matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ); - - if (matched != 0) { - retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); - } - - if (matched == 1) { - return retval; - } - } - - { /* else if (a < 0.0625f && a > 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); - mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); - t = simde_mm512_sqrt_pd(t); - t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m512d p[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) - }; - - const simde__m512d q[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) - }; - - /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ - simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[3]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[2]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); - - simde__m512d res = simde_mm512_div_pd(numerator, denominator); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - } - } - - { /* else if (a < 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); - mask = ~matched & mask; - - if (mask != 0) { - matched = matched | mask; - - /* t = 1/(sqrt(-log(a))) */ - simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); - t = simde_mm512_sqrt_pd(t); - t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); - - const simde__m512d p[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) - }; - - const simde__m512d q[] = { - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), - simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) - }; - - /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ - simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]); - numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); - numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); - - /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ - simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); - denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); - - simde__m512d res = simde_mm512_div_pd(numerator, denominator); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - - if (matched == 1) { - return retval; - } - } - } - - { /* else if (a == 0.0f) */ - simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); - mask = ~matched & mask; - matched = matched | mask; - - simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY); - - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); - } - - { /* else */ - /* (a >= 2.0f) */ - retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY))); - } - - return retval; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_erfcinv_pd - #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfcinv_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfcinv_ps - #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_erfcinv_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_erfcinv_pd - #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_logb_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_logb_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_logb_ps - #define _mm_logb_ps(a) simde_mm_logb_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_logb_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_logb_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_logb_pd - #define _mm_logb_pd(a) simde_mm_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_logb_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_logb_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_logb_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_logb_ps - #define _mm256_logb_ps(a) simde_mm256_logb_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_logb_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_logb_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_logb_pd - #define _mm256_logb_pd(a) simde_mm256_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_logb_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_logb_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_logbf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_logb_ps - #define _mm512_logb_ps(a) simde_mm512_logb_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_logb_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_logb_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_logb(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_logb_pd - #define _mm512_logb_pd(a) simde_mm512_logb_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_logb_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_logb_ps - #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_logb_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_logb_pd - #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log2_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f4_u35(a); - #else - return Sleef_log2f4_u10(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log2_ps - #define _mm_log2_ps(a) simde_mm_log2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log2_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d2_u35(a); - #else - return Sleef_log2d2_u10(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log2_pd - #define _mm_log2_pd(a) simde_mm_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log2_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f8_u35(a); - #else - return Sleef_log2f8_u10(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log2_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log2_ps - #define _mm256_log2_ps(a) simde_mm256_log2_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log2_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d4_u35(a); - #else - return Sleef_log2d4_u10(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log2_pd - #define _mm256_log2_pd(a) simde_mm256_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log2_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log2_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2f16_u35(a); - #else - return Sleef_log2f16_u10(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log2f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log2_ps - #define _mm512_log2_ps(a) simde_mm512_log2_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log2_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log2_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) - return Sleef_log2d8_u35(a); - #else - return Sleef_log2d8_u10(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log2(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log2_pd - #define _mm512_log2_pd(a) simde_mm512_log2_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log2_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log2_ps - #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log2_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log2_pd - #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log1p_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log1p_ps - #define _mm_log1p_ps(a) simde_mm_log1p_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log1p_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log1p_pd - #define _mm_log1p_pd(a) simde_mm_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log1p_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log1p_ps - #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log1p_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log1p_pd - #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log1p_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log1p_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log1pf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log1p_ps - #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log1p_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log1p_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log1p(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log1p_pd - #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log1p_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log1p_ps - #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log1p_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log1p_pd - #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_log10_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10f4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log10_ps - #define _mm_log10_ps(a) simde_mm_log10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_log10_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10d2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_log10_pd - #define _mm_log10_pd(a) simde_mm_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_log10_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10f8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_log10_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log10_ps - #define _mm256_log10_ps(a) simde_mm256_log10_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_log10_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10d4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_log10_pd - #define _mm256_log10_pd(a) simde_mm256_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_log10_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log10_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10f16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_log10f(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log10_ps - #define _mm512_log10_ps(a) simde_mm512_log10_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_log10_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_log10_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10d8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_log10(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_log10_pd - #define _mm512_log10_pd(a) simde_mm512_log10_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log10_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log10_ps - #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_log10_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_log10_pd - #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_nearbyint_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_nearbyint_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_nearbyint_ps - #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_nearbyint_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_nearbyint_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_nearbyint_pd - #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_nearbyint_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_nearbyint_ps - #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_nearbyint_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_nearbyint_pd - #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_pow_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powf4_u10(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_pow_ps - #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_pow_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powd2_u10(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_pow_pd - #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_pow_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powf8_u10(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_pow_ps - #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_pow_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powd4_u10(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_pow_pd - #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_pow_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_pow_ps(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powf16_u10(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_pow_ps - #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_pow_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_pow_pd(a, b); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powd8_u10(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_pow_pd - #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_pow_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_pow_ps - #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_pow_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_pow_pd - #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_clog_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_clog_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1])); - r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_clog_ps - #define _mm_clog_ps(a) simde_mm_clog_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_clog_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_clog_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1])); - r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_clog_ps - #define _mm256_clog_ps(a) simde_mm256_clog_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_csqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_csqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); - simde__m128_private pow_res_=simde__m128_to_private(pow_res); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); - simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); - - r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_csqrt_ps - #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_csqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_csqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))); - simde__m256_private pow_res_=simde__m256_to_private(pow_res); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { - simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); - simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); - - r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_csqrt_ps - #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi8 - #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi16 - #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi32 - #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b) - #undef _mm_irem_epi32 - #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epi64 - #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu8 - #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu16 - #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu32 - #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b) - #undef _mm_urem_epi32 - #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_rem_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_rem_epu64 - #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi8 - #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi16 - #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi32 - #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b) - #undef _mm256_irem_epi32 - #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epi64 - #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu8 - #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu16 - #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu32 - #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b) - #undef _mm256_urem_epi32 - #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rem_epu64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_rem_epu64 - #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i8 = a_.i8 % b_.i8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi8 - #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i16 = a_.i16 % b_.i16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi16 - #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i32 = a_.i32 % b_.i32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi32 - #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rem_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rem_epi32 - #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epi64 - #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u8 = a_.u8 % b_.u8; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu8 - #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u16 = a_.u16 % b_.u16; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu16 - #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u32 = a_.u32 % b_.u32; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu32 - #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rem_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rem_epu32 - #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rem_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.u64 = a_.u64 % b_.u64; - #else - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rem_epu64 - #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_recip_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_recip_ps(a); - #else - return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_recip_ps - #define _mm512_recip_ps(a) simde_mm512_recip_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_recip_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_recip_pd(a); - #else - return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_recip_pd - #define _mm512_recip_pd(a) simde_mm512_recip_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_recip_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_recip_ps - #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_recip_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_recip_pd - #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_rint_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rint_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_rintf(a_.f32[i]); - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rint_ps - #define _mm512_rint_ps(a) simde_mm512_rint_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_rint_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_rint_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_rint(a_.f64[i]); - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_rint_pd - #define _mm512_rint_pd(a) simde_mm512_rint_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rint_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rint_ps - #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_rint_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_rint_pd - #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sin_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf4_u10(a); - #else - return Sleef_sinf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sin_ps - #define _mm_sin_ps(a) simde_mm_sin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sin_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind2_u10(a); - #else - return Sleef_sind2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sin_pd - #define _mm_sin_pd(a) simde_mm_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sin_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf8_u10(a); - #else - return Sleef_sinf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sin_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sin_ps - #define _mm256_sin_ps(a) simde_mm256_sin_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sin_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind4_u10(a); - #else - return Sleef_sind4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sin_pd - #define _mm256_sin_pd(a) simde_mm256_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sin_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sin_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf16_u10(a); - #else - return Sleef_sinf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sin_ps - #define _mm512_sin_ps(a) simde_mm512_sin_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sin_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sin_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind8_u10(a); - #else - return Sleef_sind8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sin_pd - #define _mm512_sin_pd(a) simde_mm512_sin_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sin_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sin_ps - #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sin_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sin_pd - #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - Sleef___m128_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf4_u10(a); - #else - temp = Sleef_sincosf4_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m128 r; - - r = simde_mm_sin_ps(a); - *mem_addr = simde_mm_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sincos_ps - #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - Sleef___m128d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd2_u10(a); - #else - temp = Sleef_sincosd2_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m128d r; - - r = simde_mm_sin_pd(a); - *mem_addr = simde_mm_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sincos_pd - #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - Sleef___m256_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf8_u10(a); - #else - temp = Sleef_sincosf8_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m256 r; - - r = simde_mm256_sin_ps(a); - *mem_addr = simde_mm256_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sincos_ps - #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - Sleef___m256d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd4_u10(a); - #else - temp = Sleef_sincosd4_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m256d r; - - r = simde_mm256_sin_pd(a); - *mem_addr = simde_mm256_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sincos_pd - #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - Sleef___m512_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosf16_u10(a); - #else - temp = Sleef_sincosf16_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m512 r; - - r = simde_mm512_sin_ps(a); - *mem_addr = simde_mm512_cos_ps(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sincos_ps - #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - Sleef___m512d_2 temp; - - #if SIMDE_ACCURACY_PREFERENCE > 1 - temp = Sleef_sincosd8_u10(a); - #else - temp = Sleef_sincosd8_u35(a); - #endif - - *mem_addr = temp.y; - return temp.x; - #else - simde__m512d r; - - r = simde_mm512_sin_pd(a); - *mem_addr = simde_mm512_cos_pd(a); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sincos_pd - #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a); - #else - simde__m512 cos_res, sin_res; - sin_res = simde_mm512_sincos_ps(&cos_res, a); - *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res); - return simde_mm512_mask_mov_ps(sin_src, k, sin_res); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sincos_ps - #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a); - #else - simde__m512d cos_res, sin_res; - sin_res = simde_mm512_sincos_pd(&cos_res, a); - *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res); - return simde_mm512_mask_mov_pd(sin_src, k, sin_res); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sincos_pd - #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sind_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sind_ps - #define _mm_sind_ps(a) simde_mm_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sind_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sind_pd - #define _mm_sind_pd(a) simde_mm_sind_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sind_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sind_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sind_ps - #define _mm256_sind_ps(a) simde_mm256_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sind_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sind_pd - #define _mm256_sind_pd(a) simde_mm256_sind_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sind_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sind_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sind_ps - #define _mm512_sind_ps(a) simde_mm512_sind_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sind_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sind_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sind_pd - #define _mm512_sind_pd(a) simde_mm512_sind_pd(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sind_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sind_ps - #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sind_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sind_pd - #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sinh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sinh_ps - #define _mm_sinh_ps(a) simde_mm_sinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sinh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_sinh_pd - #define _mm_sinh_pd(a) simde_mm_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sinh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sinh_ps - #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sinh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_sinh_pd - #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sinh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sinh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sinhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sinh_ps - #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sinh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sinh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sinh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_sinh_pd - #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sinh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sinh_ps - #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sinh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sinh_pd - #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_ceil_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceilf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_ceil_ps - #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_ceil_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceild2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_ceil_pd - #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_ceil_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceilf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_ceil_ps - #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_ceil_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceild4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_ceil_pd - #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_ceil_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_ceil_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceilf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_ceil_ps - #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_ceil_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_ceil_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceild8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_ceil_pd - #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_ceil_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ceil_ps - #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_ceil_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_ceil_pd - #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_floor_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floorf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_floor_ps - #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_floor_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floord2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_floor_pd - #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_floor_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floorf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_floor_ps - #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_floor_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floord4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_floor_pd - #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_floor_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_floor_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floorf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_floor_ps - #define _mm512_floor_ps(a) simde_mm512_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_floor_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_floor_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floord8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_floor_pd - #define _mm512_floor_pd(a) simde_mm512_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_floor_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_floor_ps - #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_floor_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_floor_pd - #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_round_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_round_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundf4(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_round_ps - #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_round_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundd2(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_round_pd - #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_round_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_round_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundf8(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_round_ps - #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_round_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundd4(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_round_pd - #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_svml_round_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_round_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_roundd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_round_pd - #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_svml_round_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_svml_round_pd - #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_svml_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtf4(a); - #else - return simde_mm_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_sqrt_ps - #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_svml_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtd2(a); - #else - return simde_mm_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_svml_sqrt_pd - #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_svml_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtf8(a); - #else - return simde_mm256_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_sqrt_ps - #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_svml_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtd4(a); - #else - return simde_mm256_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_svml_sqrt_pd - #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_svml_sqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_sqrt_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtf16(a); - #else - return simde_mm512_sqrt_ps(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_sqrt_ps - #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_svml_sqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_svml_sqrt_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtd8(a); - #else - return simde_mm512_sqrt_pd(a); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_svml_sqrt_pd - #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tan_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf4_u10(a); - #else - return Sleef_tanf4_u35(a); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tan_ps - #define _mm_tan_ps(a) simde_mm_tan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tan_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand2_u10(a); - #else - return Sleef_tand2_u35(a); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tan_pd - #define _mm_tan_pd(a) simde_mm_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tan_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf8_u10(a); - #else - return Sleef_tanf8_u35(a); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tan_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tan_ps - #define _mm256_tan_ps(a) simde_mm256_tan_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tan_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand4_u10(a); - #else - return Sleef_tand4_u35(a); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tan_pd - #define _mm256_tan_pd(a) simde_mm256_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tan_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tan_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf16_u10(a); - #else - return Sleef_tanf16_u35(a); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tan_ps - #define _mm512_tan_ps(a) simde_mm512_tan_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tan_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tan_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand8_u10(a); - #else - return Sleef_tand8_u35(a); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tan_pd - #define _mm512_tan_pd(a) simde_mm512_tan_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tan_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tan_ps - #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tan_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tan_pd - #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tand_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a)); - #else - return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a)); - #endif - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tand_ps - #define _mm_tand_ps(a) simde_mm_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tand_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a)); - #else - return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a)); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tand_pd - #define _mm_tand_pd(a) simde_mm_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tand_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a)); - #else - return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a)); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tand_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tand_ps - #define _mm256_tand_ps(a) simde_mm256_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tand_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a)); - #else - return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a)); - #endif - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tand_pd - #define _mm256_tand_pd(a) simde_mm256_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tand_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tand_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a)); - #else - return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a)); - #endif - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tand_ps - #define _mm512_tand_ps(a) simde_mm512_tand_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tand_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tand_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - #if SIMDE_ACCURACY_PREFERENCE > 1 - return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a)); - #else - return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a)); - #endif - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tand_pd - #define _mm512_tand_pd(a) simde_mm512_tand_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tand_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tand_ps - #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tand_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tand_pd - #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_tanh_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhf4_u10(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tanh_ps - #define _mm_tanh_ps(a) simde_mm_tanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_tanh_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhd2_u10(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_tanh_pd - #define _mm_tanh_pd(a) simde_mm_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_tanh_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhf8_u10(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tanh_ps - #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_tanh_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhd4_u10(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_tanh_pd - #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_tanh_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tanh_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhf16_u10(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_tanhf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tanh_ps - #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_tanh_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_tanh_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhd8_u10(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_tanh(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_tanh_pd - #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tanh_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tanh_ps - #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_tanh_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_tanh_pd - #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_trunc_ps (simde__m128 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncf4(a); - #else - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_trunc_ps - #define _mm_trunc_ps(a) simde_mm_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_trunc_pd (simde__m128d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) - return _mm_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncd2(a); - #else - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_trunc_pd - #define _mm_trunc_pd(a) simde_mm_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_trunc_ps (simde__m256 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncf8(a); - #else - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_trunc_ps - #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_trunc_pd (simde__m256d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncd4(a); - #else - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_trunc_pd - #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_trunc_ps (simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_trunc_ps(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_truncf16(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_trunc_ps - #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_trunc_pd (simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_trunc_pd(a); - #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_truncd8(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_trunc_pd - #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_trunc_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_trunc_ps - #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_trunc_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a)); - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_trunc_pd - #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) - return _mm_udivrem_epi32(mem_addr, a, b); - #else - simde__m128i r; - - r = simde_mm_div_epu32(a, b); - *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm_udivrem_epi32 - #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) - return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); - #else - simde__m256i r; - - r = simde_mm256_div_epu32(a, b); - *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b)); - - return r; - #endif -} -#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) - #undef _mm256_udivrem_epi32 - #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b)) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SVML_H) */ -/* :: End simde/x86/svml.h :: */ diff --git a/src/simde/x86/xop.h b/src/simde/x86/xop.h deleted file mode 100644 index 5b30b7d13..000000000 --- a/src/simde/x86/xop.h +++ /dev/null @@ -1,43924 +0,0 @@ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/xop.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_XOP_H) -#define SIMDE_X86_XOP_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2019-2020 Michael R. Crusoe - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX2_H) -#define SIMDE_X86_AVX2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/avx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2018-2020 Evan Nemerson - * 2020 Michael R. Crusoe - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/mmx.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-common.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/hedley.h :: */ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) -#if defined(HEDLEY_VERSION) -# undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 16 - -#if defined(HEDLEY_STRINGIFY_EX) -# undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -# undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -# undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(HEDLEY_CONCAT) -# undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) - -#if defined(HEDLEY_CONCAT3_EX) -# undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(HEDLEY_CONCAT3) -# undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(HEDLEY_VERSION_ENCODE) -# undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -# undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -# undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -# undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -# undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -# undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -# undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -# undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -# undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -# undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -# undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) -# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -# undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -# undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) -# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -# undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -# undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -# undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -# undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -# undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -# undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -# undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -# undef HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -# if (__TI_COMPILER_VERSION__ >= 16000000) -# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -# endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -# undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -# undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -# undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -# undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -# undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -# undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) -# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -# undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -# undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -# undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -# undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -# undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -# undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -# undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -# undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -# if defined(_RELEASE_PATCHLEVEL) -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) -# else -# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -# endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -# undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -# undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -# if __VER__ > 1000 -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) -# else -# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) -# endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -# undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -# undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -# undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -# undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -# undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -# undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -# undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -# undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -# undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION) -# undef HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) -# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) -# undef HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -# undef HEDLEY_GCC_VERSION -#endif -#if \ - defined(HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && \ - !defined(HEDLEY_CRAY_VERSION) && \ - !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(HEDLEY_MCST_LCC_VERSION) -# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -# undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -# undef HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) -#else -# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -# undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -# define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -# undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -# undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else -# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -# undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -# define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -# undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -# undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else -# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -# undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -# define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -# undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -# undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else -# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else -# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else -# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -# undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -# define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -# undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -# undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else -# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) -# define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_PRAGMA(value) __pragma(value) -#else -# define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -# undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -# undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -# define HEDLEY_DIAGNOSTIC_PUSH -# define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wc++98-compat") -# if HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -# undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -# undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -# undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -# undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if HEDLEY_HAS_WARNING("-Wold-style-cast") -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - HEDLEY_DIAGNOSTIC_POP -# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) -# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if HEDLEY_HAS_WARNING("-Wunused-function") -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else -# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(HEDLEY_DEPRECATED) -# undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -# undef HEDLEY_DEPRECATED_FOR -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) -# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_DEPRECATED(since) __declspec(deprecated) -# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -# define HEDLEY_DEPRECATED(since) -# define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -# undef HEDLEY_UNAVAILABLE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warning) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else -# define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -# undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -# undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -# define HEDLEY_WARN_UNUSED_RESULT -# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -# undef HEDLEY_SENTINEL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -# define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -# undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NO_RETURN __noreturn -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -# define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NO_RETURN __declspec(noreturn) -#else -# define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -# undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -# define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -# undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -# undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -# undef HEDLEY_ASSUME -#endif -#if \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -# define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# if defined(__cplusplus) -# define HEDLEY_ASSUME(expr) std::_nassert(expr) -# else -# define HEDLEY_ASSUME(expr) _nassert(expr) -# endif -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -# if defined(HEDLEY_UNREACHABLE) -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -# else -# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -# endif -#endif -#if defined(HEDLEY_UNREACHABLE) -# if \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) -# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -# else -# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -# endif -#else -# define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -# pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) -# if defined(__clang__) -# pragma clang diagnostic ignored "-Wvariadic-macros" -# elif defined(HEDLEY_GCC_VERSION) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif -#endif -#if defined(HEDLEY_NON_NULL) -# undef HEDLEY_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -# define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -# undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - HEDLEY_HAS_ATTRIBUTE(format) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else -# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -# undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -# endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -# define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -# undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -# undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -# undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -# undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) -# define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) -# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define HEDLEY_LIKELY(expr) (!!(expr)) -# define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -# undef HEDLEY_MALLOC -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(malloc) || \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_MALLOC __declspec(restrict) -#else -# define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -# undef HEDLEY_PURE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(pure) || \ - HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -# undef HEDLEY_CONST -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(const) || \ - HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_CONST __attribute__((__const__)) -#elif \ - HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define HEDLEY_CONST _Pragma("no_side_effect") -#else -# define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -# undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) -# define HEDLEY_RESTRICT restrict -#elif \ - HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) -# define HEDLEY_RESTRICT _Restrict -#else -# define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -# undef HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -# define HEDLEY_INLINE inline -#elif \ - defined(HEDLEY_GCC_VERSION) || \ - HEDLEY_ARM_VERSION_CHECK(6,2,0) -# define HEDLEY_INLINE __inline__ -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_INLINE __inline -#else -# define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -# undef HEDLEY_ALWAYS_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif \ - HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -# undef HEDLEY_NEVER_INLINE -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(noinline) || \ - HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) -# define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) -# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) -# define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) -# define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -# define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -# undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -# undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -# undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC __declspec(dllexport) -# define HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - HEDLEY_HAS_ATTRIBUTE(visibility) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define HEDLEY_PRIVATE -# define HEDLEY_PUBLIC -# endif -# define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -# undef HEDLEY_NO_THROW -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) -# define HEDLEY_NO_THROW __declspec(nothrow) -#else -# define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -# undef HEDLEY_FALL_THROUGH -#endif -#if defined(HEDLEY_INTEL_VERSION) -# define HEDLEY_FALL_THROUGH -#elif \ - HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -# define HEDLEY_FALL_THROUGH __fallthrough -#else -# define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -# undef HEDLEY_RETURNS_NON_NULL -#endif -#if \ - HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -# define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -# undef HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_TINYC_VERSION) -# define HEDLEY_ARRAY_PARAM(name) (name) -#else -# define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -# undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -# undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -# undef HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -# endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && \ - !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) -# if defined(__INTPTR_TYPE__) -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -# else -# include -# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -# endif -# elif \ - defined(HEDLEY_GCC_VERSION) || \ - defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || \ - defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || \ - defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ - ((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -# if !defined(HEDLEY_IS_CONSTANT) -# define HEDLEY_IS_CONSTANT(expr) (0) -# endif -# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -# undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -# undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -# undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -# define HEDLEY_BEGIN_C_DECLS extern "C" { -# define HEDLEY_END_C_DECLS } -# define HEDLEY_C_DECL extern "C" -#else -# define HEDLEY_BEGIN_C_DECLS -# define HEDLEY_END_C_DECLS -# define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -# undef HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -# undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -# if __cplusplus >= 201103L -# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -# elif defined(NULL) -# define HEDLEY_NULL NULL -# else -# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) -# endif -#elif defined(NULL) -# define HEDLEY_NULL NULL -#else -# define HEDLEY_NULL ((void*) 0) -#endif - -#if defined(HEDLEY_MESSAGE) -# undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -# undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -# undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -# undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if HEDLEY_HAS_WARNING("-Wgcc-compat") -# define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# define HEDLEY_REQUIRE_MSG(expr,msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define HEDLEY_REQUIRE(expr) -# define HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(HEDLEY_FLAGS) -# undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) -# define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -# define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -# undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -# undef HEDLEY_EMPTY_BASES -#endif -#if \ - (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -# define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else -# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -# undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -# undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -# undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -# undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ -/* :: End simde/hedley.h :: */ - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 8 -#define SIMDE_VERSION_MICRO 0 -#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) -// Also update meson.build in the root directory of the repository - -#include -#include - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-detect-clang.h :: */ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. Updated based on - * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 - * - would welcome patches/updates there as well. - */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_attribute(unsafe_buffer_usage) // no new warnings in 17.0 -# define SIMDE_DETECT_CLANG_VERSION 170000 -# elif __has_attribute(nouwtable) // no new warnings in 16.0 -# define SIMDE_DETECT_CLANG_VERSION 160000 -# elif __has_warning("-Warray-parameter") -# define SIMDE_DETECT_CLANG_VERSION 150000 -# elif __has_warning("-Wbitwise-instead-of-logical") -# define SIMDE_DETECT_CLANG_VERSION 140000 -# elif __has_warning("-Waix-compat") -# define SIMDE_DETECT_CLANG_VERSION 130000 -# elif __has_warning("-Wformat-insufficient-args") -# define SIMDE_DETECT_CLANG_VERSION 120000 -# elif __has_warning("-Wimplicit-const-int-float-conversion") -# define SIMDE_DETECT_CLANG_VERSION 110000 -# elif __has_warning("-Wmisleading-indentation") -# define SIMDE_DETECT_CLANG_VERSION 100000 -# elif defined(__FILE_NAME__) -# define SIMDE_DETECT_CLANG_VERSION 90000 -# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) -# define SIMDE_DETECT_CLANG_VERSION 80000 -// For reasons unknown, Xcode 10.3 (Apple LLVM version 10.0.1) is apparently -// based on Clang 7, but does not support the warning we test. -// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and -// https://trac.macports.org/wiki/XcodeVersionInfo. -# elif __has_warning("-Wc++98-compat-extra-semi") || \ - (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) -# define SIMDE_DETECT_CLANG_VERSION 70000 -# elif __has_warning("-Wpragma-pack") -# define SIMDE_DETECT_CLANG_VERSION 60000 -# elif __has_warning("-Wbitfield-enum-conversion") -# define SIMDE_DETECT_CLANG_VERSION 50000 -# elif __has_attribute(diagnose_if) -# define SIMDE_DETECT_CLANG_VERSION 40000 -# elif __has_warning("-Wcomma") -# define SIMDE_DETECT_CLANG_VERSION 39000 -# elif __has_warning("-Wdouble-promotion") -# define SIMDE_DETECT_CLANG_VERSION 38000 -# elif __has_warning("-Wshift-negative-value") -# define SIMDE_DETECT_CLANG_VERSION 37000 -# elif __has_warning("-Wambiguous-ellipsis") -# define SIMDE_DETECT_CLANG_VERSION 36000 -# else -# define SIMDE_DETECT_CLANG_VERSION 1 -# endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) -#else -# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ -/* :: End simde/simde-detect-clang.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-arch.h :: */ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -# if defined(__alpha_ev6__) -# define SIMDE_ARCH_ALPHA 6 -# elif defined(__alpha_ev5__) -# define SIMDE_ARCH_ALPHA 5 -# elif defined(__alpha_ev4__) -# define SIMDE_ARCH_ALPHA 4 -# else -# define SIMDE_ARCH_ALPHA 1 -# endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -# define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -# define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -# if !defined(_M_ARM64EC) -# define SIMDE_ARCH_AMD64 1000 -# endif -#endif - -/* ARM - */ -#if defined(__ARM_ARCH) -# if __ARM_ARCH > 100 -# define SIMDE_ARCH_ARM (__ARM_ARCH) -# else -# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) -# endif -#elif defined(_M_ARM) -# if _M_ARM > 100 -# define SIMDE_ARCH_ARM (_M_ARM) -# else -# define SIMDE_ARCH_ARM (_M_ARM * 100) -# endif -#elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_ARM 800 -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -# define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) -#else -# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -# define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -# elif defined(SIMDE_ARCH_ARM) -# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -# endif -#endif -#if defined(__ARM_FEATURE_SVE) -# define SIMDE_ARCH_ARM_SVE -#endif -#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA -# define SIMDE_ARCH_ARM_FMA -#endif -#if defined(__ARM_FEATURE_CRYPTO) -# define SIMDE_ARCH_ARM_CRYPTO -#endif -#if defined(__ARM_FEATURE_QRDMX) -# define SIMDE_ARCH_ARM_QRDMX -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -# define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -# define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) -# define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -# define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -# define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -# define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -# define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -# define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -# define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -# define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -# define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -# define SIMDE_ARCH_H8300 -#endif - -/* Elbrus (8S, 8SV and successors) - */ -#if defined(__e2k__) -# define SIMDE_ARCH_E2K -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) -# define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -# define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -# define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -# define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -# define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -# define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -# define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -# define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -# define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -# define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -# define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -# define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -# define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 and Elbrus */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) -# if defined(_M_IX86_FP) -# define SIMDE_ARCH_X86_MMX -# if (_M_IX86_FP >= 1) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if (_M_IX86_FP >= 2) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# elif defined(_M_X64) -# define SIMDE_ARCH_X86_SSE 1 -# define SIMDE_ARCH_X86_SSE2 1 -# else -# if defined(__MMX__) -# define SIMDE_ARCH_X86_MMX 1 -# endif -# if defined(__SSE__) -# define SIMDE_ARCH_X86_SSE 1 -# endif -# if defined(__SSE2__) -# define SIMDE_ARCH_X86_SSE2 1 -# endif -# endif -# if defined(__SSE3__) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if defined(__SSSE3__) -# define SIMDE_ARCH_X86_SSSE3 1 -# endif -# if defined(__SSE4_1__) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if defined(__SSE4_2__) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# if defined(__XOP__) -# define SIMDE_ARCH_X86_XOP 1 -# endif -# if defined(__AVX__) -# define SIMDE_ARCH_X86_AVX 1 -# if !defined(SIMDE_ARCH_X86_SSE3) -# define SIMDE_ARCH_X86_SSE3 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_1) -# define SIMDE_ARCH_X86_SSE4_1 1 -# endif -# if !defined(SIMDE_ARCH_X86_SSE4_2) -# define SIMDE_ARCH_X86_SSE4_2 1 -# endif -# endif -# if defined(__AVX2__) -# define SIMDE_ARCH_X86_AVX2 1 -# if defined(_MSC_VER) -# define SIMDE_ARCH_X86_FMA 1 -# endif -# endif -# if defined(__FMA__) -# define SIMDE_ARCH_X86_FMA 1 -# if !defined(SIMDE_ARCH_X86_AVX) -# define SIMDE_ARCH_X86_AVX 1 -# endif -# endif -# if defined(__AVX512VP2INTERSECT__) -# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -# endif -# if defined(__AVX512BITALG__) -# define SIMDE_ARCH_X86_AVX512BITALG 1 -# endif -# if defined(__AVX512VPOPCNTDQ__) -# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 -# endif -# if defined(__AVX512VBMI__) -# define SIMDE_ARCH_X86_AVX512VBMI 1 -# endif -# if defined(__AVX512VBMI2__) -# define SIMDE_ARCH_X86_AVX512VBMI2 1 -# endif -# if defined(__AVX512VNNI__) -# define SIMDE_ARCH_X86_AVX512VNNI 1 -# endif -# if defined(__AVX5124VNNIW__) -# define SIMDE_ARCH_X86_AVX5124VNNIW 1 -# endif -# if defined(__AVX512BW__) -# define SIMDE_ARCH_X86_AVX512BW 1 -# endif -# if defined(__AVX512BF16__) -# define SIMDE_ARCH_X86_AVX512BF16 1 -# endif -# if defined(__AVX512CD__) -# define SIMDE_ARCH_X86_AVX512CD 1 -# endif -# if defined(__AVX512DQ__) -# define SIMDE_ARCH_X86_AVX512DQ 1 -# endif -# if defined(__AVX512F__) -# define SIMDE_ARCH_X86_AVX512F 1 -# endif -# if defined(__AVX512VL__) -# define SIMDE_ARCH_X86_AVX512VL 1 -# endif -# if defined(__AVX512FP16__) -# define SIMDE_ARCH_X86_AVX512FP16 1 -# endif -# if defined(__GFNI__) -# define SIMDE_ARCH_X86_GFNI 1 -# endif -# if defined(__PCLMUL__) -# define SIMDE_ARCH_X86_PCLMUL 1 -# endif -# if defined(__VPCLMULQDQ__) -# define SIMDE_ARCH_X86_VPCLMULQDQ 1 -# endif -# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) -# define SIMDE_ARCH_X86_F16C 1 -# endif -# if defined(__AES__) -# define SIMDE_ARCH_X86_AES 1 -# endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -# define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -# define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -# define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -# define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -# define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -# define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -# define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -# define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -# define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -# define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -# define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -# define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -# define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -# define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -# define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -# define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -# define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -# define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -# define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -# define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -#if defined(__mips_msa) -# define SIMDE_ARCH_MIPS_MSA 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -# define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -# define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -# define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -# define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -# define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -# define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -# define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -# define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -# define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -# define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -# define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -# define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -# define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -# define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -# define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) -# define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) - #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else - #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -#if defined(__riscv) && __riscv_xlen==64 -# define SIMDE_ARCH_RISCV64 -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -# define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -# define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -# define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -# define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -# define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -# define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -# define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -# define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -# define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -# define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) - #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else - #define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -# define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -# define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -# define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -# define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -# define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -# define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define SIMDE_ARCH_ZARCH __ARCH__ -#endif -#if defined(SIMDE_ARCH_ZARCH) - #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) -#else - #define SIMDE_ARCH_ZARCH_CHECK(version) (0) -#endif - -#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) - #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -# define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -# define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -# define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -# define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -# define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -# define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -# define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -# define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -# define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -# define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) - #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else - #define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -# define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -# define SIMDE_ARCH_WASM_SIMD128 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_relaxed_simd__) -# define SIMDE_ARCH_WASM_RELAXED_SIMD -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -# define SIMDE_ARCH_XTENSA 1 -#endif - -/* Availability of 16-bit floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_FP16 -#endif - -/* Availability of 16-bit brain floating-point arithmetic intrinsics */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -# define SIMDE_ARCH_ARM_NEON_BF16 -#endif - -/* LoongArch - */ -#if defined(__loongarch32) -# define SIMDE_ARCH_LOONGARCH 1 -#elif defined(__loongarch64) -# define SIMDE_ARCH_LOONGARCH 2 -#endif - -/* LSX: LoongArch 128-bits SIMD extension */ -#if defined(__loongarch_sx) -# define SIMDE_ARCH_LOONGARCH_LSX 1 -#endif - -/* LASX: LoongArch 256-bits SIMD extension */ -#if defined(__loongarch_asx) -# define SIMDE_ARCH_LOONGARCH_LASX 2 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ -/* :: End simde/simde-arch.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-features.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-diagnostic.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") -#elif \ - HEDLEY_TI_VERSION_CHECK(16,9,9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) - #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if \ - HEDLEY_HAS_WARNING("-Wconditional-uninitialized") - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if \ - HEDLEY_HAS_WARNING("-Wfloat-equal") || \ - HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) - #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* Similar to above; types like simde__m128i are reserved due to the - * double underscore, but we didn't choose them, Intel did. */ -#if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -/* If you add an unused attribute to a function and don't use it, clang - * may emit this. */ -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* Similar problm as above; we rely on some basic C99 support, but clang - * has started warning obut this even in C17 mode with -Weverything. */ -#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) -#else - #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") - #if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") - #else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") - /* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. On x86, - * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ - #if \ - (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ - SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - #endif -#else - #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* Prior to 5.0, clang didn't support disabling diagnostics in - * statement exprs. As a result, some macros we use don't - * properly silence warnings. */ -#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4,8,0) - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) -#elif defined(__clang__) - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else - #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else - #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -/* Some native functions on E2K with instruction set < v6 are declared - * as deprecated due to inefficiency. Still they are more efficient - * than SIMDe implementation. So we're using them, and switching off - * these deprecation warnings. */ -#if defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") -#else -# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS -# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ -/* :: End simde/simde-diagnostic.h :: */ - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SVML) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) - #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) - #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BITALG) - #define SIMDE_X86_AVX512BITALG_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI) - #define SIMDE_X86_AVX512VBMI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VBMI2) - #define SIMDE_X86_AVX512VBMI2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VNNI) - #define SIMDE_X86_AVX512VNNI_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) - #define SIMDE_X86_AVX5124VNNIW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512CD) - #define SIMDE_X86_AVX512CD_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512DQ) - #define SIMDE_X86_AVX512DQ_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512VL) - #define SIMDE_X86_AVX512VL_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BW) - #define SIMDE_X86_AVX512BW_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && !defined(SIMDE_X86_AVX512FP16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512FP16) - #define SIMDE_X86_AVX512FP16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512BF16) - #define SIMDE_X86_AVX512BF16_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX512F) - #define SIMDE_X86_AVX512F_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_FMA) - #define SIMDE_X86_FMA_NATIVE - #endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX2) - #define SIMDE_X86_AVX2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AVX) - #define SIMDE_X86_AVX_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_XOP) - #define SIMDE_X86_XOP_NATIVE - #endif -#endif -#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_2) - #define SIMDE_X86_SSE4_2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE4_1) - #define SIMDE_X86_SSE4_1_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSSE3) - #define SIMDE_X86_SSSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE3) - #define SIMDE_X86_SSE3_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_AES_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_AES) - #define SIMDE_X86_AES_NATIVE - #endif -#endif -#if defined(SIMDE_X86_AES_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE2) - #define SIMDE_X86_SSE2_NATIVE - #endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_SSE) - #define SIMDE_X86_SSE_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_MMX) - #define SIMDE_X86_MMX_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_GFNI) - #define SIMDE_X86_GFNI_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_PCLMUL) - #define SIMDE_X86_PCLMUL_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) - #define SIMDE_X86_VPCLMULQDQ_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86_F16C) - #define SIMDE_X86_F16C_NATIVE - #endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_X86) && (defined(__INTEL_COMPILER) || (HEDLEY_MSVC_VERSION_CHECK(14, 20, 0) && !defined(__clang__))) - #define SIMDE_X86_SVML_NATIVE - #endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(push) - #pragma warning(disable:4799) -#endif - -#if \ - defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || defined(SIMDE_X86_SVML_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) - #include -#elif defined(SIMDE_X86_SSSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE3_NATIVE) - #include -#elif defined(SIMDE_X86_SSE2_NATIVE) - #include -#elif defined(SIMDE_X86_SSE_NATIVE) - #include -#elif defined(SIMDE_X86_MMX_NATIVE) - #include -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(_MSC_VER) - #include - #else - #include - #endif -#endif - -#if defined(SIMDE_X86_AES_NATIVE) - #include -#endif - -#if defined(HEDLEY_MSVC_VERSION) - #pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) - #define SIMDE_ARM_NEON_A64V8_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) - #define SIMDE_ARM_NEON_A32V8_NATIVE - #endif -#endif -#if defined(__ARM_ACLE) - #include -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) - #define SIMDE_ARM_NEON_A32V7_NATIVE - #endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include - #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) - #include - #endif -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_SVE) - #define SIMDE_ARM_SVE_NATIVE - #include - #endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_SIMD128) - #define SIMDE_WASM_SIMD128_NATIVE - #endif -#endif - -#if !defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) && !defined(SIMDE_WASM_RELAXED_SIMD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_WASM_RELAXED_SIMD) - #define SIMDE_WASM_RELAXED_SIMD_NATIVE - #endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) - #include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) - #define SIMDE_POWER_ALTIVEC_P9_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) - #define SIMDE_POWER_ALTIVEC_P8_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) - #define SIMDE_POWER_ALTIVEC_P7_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) - #define SIMDE_POWER_ALTIVEC_P6_NATIVE - #endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) - #define SIMDE_POWER_ALTIVEC_P5_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_15_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_14_NATIVE - #endif -#endif - -#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) - #define SIMDE_ZARCH_ZVECTOR_13_NATIVE - #endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - /* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ - #if defined(bool) - #undef bool - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #include - - #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #if defined(vector) - #undef vector - #endif - #if defined(pixel) - #undef pixel - #endif - #if defined(bool) - #undef bool - #endif - #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #include - #endif - - /* Use these intsead of vector/pixel/bool in SIMDe. */ - #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T - #define SIMDE_POWER_ALTIVEC_PIXEL __pixel - #define SIMDE_POWER_ALTIVEC_BOOL __bool - - /* Re-define bool if we're using stdbool.h */ - #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) - #define bool _Bool - #endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) - #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_MIPS_MSA) - #define SIMDE_MIPS_MSA_NATIVE 1 - #endif -#endif -#if defined(SIMDE_MIPS_MSA_NATIVE) - #include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. - * - * Note that, for some architectures (okay, *one* architecture) there - * can be a split where some types are supported for one vector length - * but others only for a shorter length. Therefore, it is possible to - * provide separate values for float/int/double types. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (512) - #elif defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (256) - #elif defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) - #elif \ - defined(SIMDE_X86_SSE2_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ - defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_NATURAL_VECTOR_SIZE (128) - #elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) - #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) - #endif - - #if !defined(SIMDE_NATURAL_VECTOR_SIZE) - #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE - #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE - #else - #define SIMDE_NATURAL_VECTOR_SIZE (0) - #endif - #endif - - #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) - #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) - #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif - #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) - #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE - #endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE2_NATIVE) - #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE3_NATIVE) - #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSSE3_NATIVE) - #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_1_NATIVE) - #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX_NATIVE) - #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX2_NATIVE) - #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_FMA_NATIVE) - #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512F_NATIVE) - #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VL_NATIVE) - #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) - #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) - #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BW_NATIVE) - #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) - #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) - #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BF16_NATIVE) - #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) - #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) - #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) - #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512DQ_NATIVE) - #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512CD_NATIVE) - #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AVX512FP16_NATIVE) - #define SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_GFNI_NATIVE) - #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_PCLMUL_NATIVE) - #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) - #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_F16C_NATIVE) - #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_AES_NATIVE) - #define SIMDE_X86_AES_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_X86_SVML_NATIVE) - #define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) - #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES - #endif - #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_ARM_SVE_NATIVE) - #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_MIPS_MSA_NATIVE) - #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES - #endif - - #if !defined(SIMDE_WASM_SIMD128_NATIVE) - #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES - #endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) - #define SIMDE_IEEE754_STORAGE -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_FP16) - #define SIMDE_ARM_NEON_FP16 -#endif - -#if defined(SIMDE_ARCH_ARM_NEON_BF16) - #define SIMDE_ARM_NEON_BF16 -#endif - -#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LASX) - #define SIMDE_LOONGARCH_LASX_NATIVE - #endif -#endif - -#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_LOONGARCH_LSX) - #define SIMDE_LOONGARCH_LSX_NATIVE - #endif -#endif - -#if defined(SIMDE_LOONGARCH_LASX_NATIVE) - #include -#endif -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - #include -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ -/* :: End simde/simde-features.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-math.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) -#define SIMDE_MATH_H 1 - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) - #if defined(__SLEEF_H__) - #define SIMDE_MATH_SLEEF_ENABLE - #endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ - #include - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) - #if defined(SLEEF_VERSION_MAJOR) - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) - #endif -#else - #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) - #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(4,4,0) - #define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else - #define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) - /* Looks like or has already been included. */ - - /* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ - #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) - #define SIMDE_MATH_HAVE_MATH_H - #elif defined(__cplusplus) - #define SIMDE_MATH_HAVE_CMATH - #endif -#elif defined(__has_include) - #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() - #define SIMDE_MATH_HAVE_CMATH - #include - #elif __has_include() - #define SIMDE_MATH_HAVE_MATH_H - #include - #elif !defined(SIMDE_MATH_NO_LIBM) - #define SIMDE_MATH_NO_LIBM - #endif -#elif !defined(SIMDE_MATH_NO_LIBM) - #if defined(__cplusplus) && (__cplusplus >= 201103L) - #define SIMDE_MATH_HAVE_CMATH - HEDLEY_DIAGNOSTIC_PUSH - #if defined(HEDLEY_MSVC_VERSION) - /* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ - #pragma warning(disable:4996) - #endif - #include - HEDLEY_DIAGNOSTIC_POP - #else - #define SIMDE_MATH_HAVE_MATH_H - #include - #endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_INFINITY (__builtin_inf()) - #elif defined(INFINITY) - #define SIMDE_MATH_INFINITY INFINITY - #endif -#endif - -#if !defined(SIMDE_INFINITYF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_inff) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_INFINITYF (__builtin_inff()) - #elif defined(INFINITYF) - #define SIMDE_MATH_INFINITYF INFINITYF - #elif defined(SIMDE_MATH_INFINITY) - #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) - #endif -#endif - -#if !defined(SIMDE_MATH_NAN) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nan) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) - #define SIMDE_MATH_NAN (__builtin_nan("")) - #elif defined(NAN) - #define SIMDE_MATH_NAN NAN - #endif -#endif - -#if !defined(SIMDE_NANF) - #if \ - HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) - #define SIMDE_MATH_NANF (__builtin_nanf("")) - #elif defined(NANF) - #define SIMDE_MATH_NANF NANF - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) - #endif -#endif - -#if !defined(SIMDE_MATH_PI) - #if defined(M_PI) - #define SIMDE_MATH_PI M_PI - #else - #define SIMDE_MATH_PI 3.14159265358979323846 - #endif -#endif - -#if !defined(SIMDE_MATH_PIF) - #if defined(M_PI) - #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) - #else - #define SIMDE_MATH_PIF 3.14159265358979323846f - #endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) - #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) - #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) - #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) - #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) - #if defined(__FLT_MIN__) - #define SIMDE_MATH_FLT_MIN __FLT_MIN__ - #else - #if !defined(FLT_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MIN FLT_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_FLT_MAX) - #if defined(__FLT_MAX__) - #define SIMDE_MATH_FLT_MAX __FLT_MAX__ - #else - #if !defined(FLT_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_FLT_MAX FLT_MAX - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) - #if defined(__DBL_MIN__) - #define SIMDE_MATH_DBL_MIN __DBL_MIN__ - #else - #if !defined(DBL_MIN) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MIN DBL_MIN - #endif -#endif - -#if !defined(SIMDE_MATH_DBL_MAX) - #if defined(__DBL_MAX__) - #define SIMDE_MATH_DBL_MAX __DBL_MAX__ - #else - #if !defined(DBL_MAX) - #if defined(__cplusplus) - #include - #else - #include - #endif - #endif - #define SIMDE_MATH_DBL_MAX DBL_MAX - #endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) - #if SIMDE_MATH_BUILTIN_LIBM(isinf) - #define simde_math_isinf(v) __builtin_isinf(v) - #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isinf(v) isinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinf(v) std::isinf(v) - #endif -#endif - -#if !defined(simde_math_isinff) - #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define simde_math_isinff(v) __builtin_isinff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isinff(v) std::isinf(v) - #elif defined(simde_math_isinf) - #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnan) - #if SIMDE_MATH_BUILTIN_LIBM(isnan) - #define simde_math_isnan(v) __builtin_isnan(v) - #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnan(v) isnan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnan(v) std::isnan(v) - #endif -#endif - -#if !defined(simde_math_isnanf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) - /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ - #define simde_math_isnanf(v) __builtin_isnanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnanf(v) std::isnan(v) - #elif defined(simde_math_isnan) - #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) - #endif -#endif - -#if !defined(simde_math_isnormal) - #if SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormal(v) __builtin_isnormal(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormal(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormal(v) std::isnormal(v) - #endif -#endif - -#if !defined(simde_math_isnormalf) - #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) - #define simde_math_isnormalf(v) __builtin_isnormalf(v) - #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) - #define simde_math_isnormalf(v) __builtin_isnormal(v) - #elif defined(isnormalf) - #define simde_math_isnormalf(v) isnormalf(v) - #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_isnormalf(v) isnormal(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_isnormalf(v) std::isnormal(v) - #elif defined(simde_math_isnormal) - #define simde_math_isnormalf(v) simde_math_isnormal(v) - #endif -#endif - -#if !defined(simde_math_issubnormalf) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) - #endif -#endif - -#if !defined(simde_math_issubnormal) - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) - #elif defined(fpclassify) - #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) - #elif defined(SIMDE_IEEE754_STORAGE) - #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) - #endif -#endif - -#if defined(FP_NAN) - #define SIMDE_MATH_FP_NAN FP_NAN -#else - #define SIMDE_MATH_FP_NAN 0 -#endif -#if defined(FP_INFINITE) - #define SIMDE_MATH_FP_INFINITE FP_INFINITE -#else - #define SIMDE_MATH_FP_INFINITE 1 -#endif -#if defined(FP_ZERO) - #define SIMDE_MATH_FP_ZERO FP_ZERO -#else - #define SIMDE_MATH_FP_ZERO 2 -#endif -#if defined(FP_SUBNORMAL) - #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL -#else - #define SIMDE_MATH_FP_SUBNORMAL 3 -#endif -#if defined(FP_NORMAL) - #define SIMDE_MATH_FP_NORMAL FP_NORMAL -#else - #define SIMDE_MATH_FP_NORMAL 4 -#endif - -static HEDLEY_INLINE -int -simde_math_fpclassifyf(float v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0f) ? SIMDE_MATH_FP_ZERO : - simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -static HEDLEY_INLINE -int -simde_math_fpclassify(double v) { - #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) - return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); - #elif defined(fpclassify) - return fpclassify(v); - #else - return - simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : - (v == 0.0) ? SIMDE_MATH_FP_ZERO : - simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : - simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : - SIMDE_MATH_FP_SUBNORMAL; - #endif -} - -#define SIMDE_MATH_FP_QNAN 0x01 -#define SIMDE_MATH_FP_PZERO 0x02 -#define SIMDE_MATH_FP_NZERO 0x04 -#define SIMDE_MATH_FP_PINF 0x08 -#define SIMDE_MATH_FP_NINF 0x10 -#define SIMDE_MATH_FP_DENORMAL 0x20 -#define SIMDE_MATH_FP_NEGATIVE 0x40 -#define SIMDE_MATH_FP_SNAN 0x80 - -static HEDLEY_INLINE -uint8_t -simde_math_fpclassf(float v, const int imm8) { - union { - float f; - uint32_t u; - } fu; - fu.f = v; - uint32_t bits = fu.u; - uint8_t NegNum = (bits >> 31) & 1; - uint32_t const ExpMask = 0x3F800000; // [30:23] - uint32_t const MantMask = 0x007FFFFF; // [22:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 22) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -static HEDLEY_INLINE -uint8_t -simde_math_fpclass(double v, const int imm8) { - union { - double d; - uint64_t u; - } du; - du.d = v; - uint64_t bits = du.u; - uint8_t NegNum = (bits >> 63) & 1; - uint64_t const ExpMask = 0x3FF0000000000000; // [62:52] - uint64_t const MantMask = 0x000FFFFFFFFFFFFF; // [51:0] - uint8_t ExpAllOnes = ((bits & ExpMask) == ExpMask); - uint8_t ExpAllZeros = ((bits & ExpMask) == 0); - uint8_t MantAllZeros = ((bits & MantMask) == 0); - uint8_t ZeroNumber = ExpAllZeros & MantAllZeros; - uint8_t SignalingBit = (bits >> 51) & 1; - - uint8_t result = 0; - uint8_t qNaN_res = ExpAllOnes & (!MantAllZeros) & SignalingBit; - uint8_t Pzero_res = (!NegNum) & ExpAllZeros & MantAllZeros; - uint8_t Nzero_res = NegNum & ExpAllZeros & MantAllZeros; - uint8_t Pinf_res = (!NegNum) & ExpAllOnes & MantAllZeros; - uint8_t Ninf_res = NegNum & ExpAllOnes & MantAllZeros; - uint8_t Denorm_res = ExpAllZeros & (!MantAllZeros); - uint8_t FinNeg_res = NegNum & (!ExpAllOnes) & (!ZeroNumber); - uint8_t sNaN_res = ExpAllOnes & (!MantAllZeros) & (!SignalingBit); - result = (((imm8 >> 0) & qNaN_res) | \ - ((imm8 >> 1) & Pzero_res) | \ - ((imm8 >> 2) & Nzero_res) | \ - ((imm8 >> 3) & Pinf_res) | \ - ((imm8 >> 4) & Ninf_res) | \ - ((imm8 >> 5) & Denorm_res) | \ - ((imm8 >> 6) & FinNeg_res) | \ - ((imm8 >> 7) & sNaN_res)); - return result; -} - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafter(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafter(x, y) nextafter(x, y) - #endif -#endif - -#if !defined(simde_math_nextafterf) - #if \ - (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nextafterf(x, y) std::nextafter(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nextafterf(x, y) nextafterf(x, y) - #endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) - #if SIMDE_MATH_BUILTIN_LIBM(abs) - #define simde_math_abs(v) __builtin_abs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_abs(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_abs(v) abs(v) - #endif -#endif - -#if !defined(simde_math_labs) - #if SIMDE_MATH_BUILTIN_LIBM(labs) - #define simde_math_labs(v) __builtin_labs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_labs(v) std::labs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_labs(v) labs(v) - #endif -#endif - -#if !defined(simde_math_llabs) - #if SIMDE_MATH_BUILTIN_LIBM(llabs) - #define simde_math_llabs(v) __builtin_llabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_llabs(v) std::llabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_llabs(v) llabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::abs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_acos) - #if SIMDE_MATH_BUILTIN_LIBM(acos) - #define simde_math_acos(v) __builtin_acos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acos(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acos(v) acos(v) - #endif -#endif - -#if !defined(simde_math_acosf) - #if SIMDE_MATH_BUILTIN_LIBM(acosf) - #define simde_math_acosf(v) __builtin_acosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosf(v) std::acos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosf(v) acosf(v) - #endif -#endif - -#if !defined(simde_math_acosh) - #if SIMDE_MATH_BUILTIN_LIBM(acosh) - #define simde_math_acosh(v) __builtin_acosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acosh(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acosh(v) acosh(v) - #endif -#endif - -#if !defined(simde_math_acoshf) - #if SIMDE_MATH_BUILTIN_LIBM(acoshf) - #define simde_math_acoshf(v) __builtin_acoshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_acoshf(v) std::acosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_acoshf(v) acoshf(v) - #endif -#endif - -#if !defined(simde_math_asin) - #if SIMDE_MATH_BUILTIN_LIBM(asin) - #define simde_math_asin(v) __builtin_asin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asin(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asin(v) asin(v) - #endif -#endif - -#if !defined(simde_math_asinf) - #if SIMDE_MATH_BUILTIN_LIBM(asinf) - #define simde_math_asinf(v) __builtin_asinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinf(v) std::asin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinf(v) asinf(v) - #endif -#endif - -#if !defined(simde_math_asinh) - #if SIMDE_MATH_BUILTIN_LIBM(asinh) - #define simde_math_asinh(v) __builtin_asinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinh(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinh(v) asinh(v) - #endif -#endif - -#if !defined(simde_math_asinhf) - #if SIMDE_MATH_BUILTIN_LIBM(asinhf) - #define simde_math_asinhf(v) __builtin_asinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_asinhf(v) std::asinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_asinhf(v) asinhf(v) - #endif -#endif - -#if !defined(simde_math_atan) - #if SIMDE_MATH_BUILTIN_LIBM(atan) - #define simde_math_atan(v) __builtin_atan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan(v) atan(v) - #endif -#endif - -#if !defined(simde_math_atan2) - #if SIMDE_MATH_BUILTIN_LIBM(atan2) - #define simde_math_atan2(y, x) __builtin_atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2(y, x) atan2(y, x) - #endif -#endif - -#if !defined(simde_math_atan2f) - #if SIMDE_MATH_BUILTIN_LIBM(atan2f) - #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atan2f(y, x) std::atan2(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atan2f(y, x) atan2f(y, x) - #endif -#endif - -#if !defined(simde_math_atanf) - #if SIMDE_MATH_BUILTIN_LIBM(atanf) - #define simde_math_atanf(v) __builtin_atanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanf(v) std::atan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanf(v) atanf(v) - #endif -#endif - -#if !defined(simde_math_atanh) - #if SIMDE_MATH_BUILTIN_LIBM(atanh) - #define simde_math_atanh(v) __builtin_atanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanh(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanh(v) atanh(v) - #endif -#endif - -#if !defined(simde_math_atanhf) - #if SIMDE_MATH_BUILTIN_LIBM(atanhf) - #define simde_math_atanhf(v) __builtin_atanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_atanhf(v) std::atanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_atanhf(v) atanhf(v) - #endif -#endif - -#if !defined(simde_math_cbrt) - #if SIMDE_MATH_BUILTIN_LIBM(cbrt) - #define simde_math_cbrt(v) __builtin_cbrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrt(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrt(v) cbrt(v) - #endif -#endif - -#if !defined(simde_math_cbrtf) - #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) - #define simde_math_cbrtf(v) __builtin_cbrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cbrtf(v) std::cbrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cbrtf(v) cbrtf(v) - #endif -#endif - -#if !defined(simde_math_ceil) - #if SIMDE_MATH_BUILTIN_LIBM(ceil) - #define simde_math_ceil(v) __builtin_ceil(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceil(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceil(v) ceil(v) - #endif -#endif - -#if !defined(simde_math_ceilf) - #if SIMDE_MATH_BUILTIN_LIBM(ceilf) - #define simde_math_ceilf(v) __builtin_ceilf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_ceilf(v) std::ceil(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_ceilf(v) ceilf(v) - #endif -#endif - -#if !defined(simde_math_copysign) - #if SIMDE_MATH_BUILTIN_LIBM(copysign) - #define simde_math_copysign(x, y) __builtin_copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysign(x, y) std::copysign(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysign(x, y) copysign(x, y) - #endif -#endif - -#if !defined(simde_math_copysignf) - #if SIMDE_MATH_BUILTIN_LIBM(copysignf) - #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_copysignf(x, y) std::copysignf(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_copysignf(x, y) copysignf(x, y) - #endif -#endif - -#if !defined(simde_math_signbit) - #if SIMDE_MATH_BUILTIN_LIBM(signbit) - #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) - #define simde_math_signbit(x) __builtin_signbit(x) - #else - #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) - #endif - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_signbit(x) std::signbit(x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_signbit(x) signbit(x) - #endif -#endif - -#if !defined(simde_math_cos) - #if SIMDE_MATH_BUILTIN_LIBM(cos) - #define simde_math_cos(v) __builtin_cos(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cos(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cos(v) cos(v) - #endif -#endif - -#if !defined(simde_math_cosf) - #if defined(SIMDE_MATH_SLEEF_ENABLE) - #if SIMDE_ACCURACY_PREFERENCE < 1 - #define simde_math_cosf(v) Sleef_cosf_u35(v) - #else - #define simde_math_cosf(v) Sleef_cosf_u10(v) - #endif - #elif SIMDE_MATH_BUILTIN_LIBM(cosf) - #define simde_math_cosf(v) __builtin_cosf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosf(v) std::cos(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosf(v) cosf(v) - #endif -#endif - -#if !defined(simde_math_cosh) - #if SIMDE_MATH_BUILTIN_LIBM(cosh) - #define simde_math_cosh(v) __builtin_cosh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_cosh(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_cosh(v) cosh(v) - #endif -#endif - -#if !defined(simde_math_coshf) - #if SIMDE_MATH_BUILTIN_LIBM(coshf) - #define simde_math_coshf(v) __builtin_coshf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_coshf(v) std::cosh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_coshf(v) coshf(v) - #endif -#endif - -#if !defined(simde_math_erf) - #if SIMDE_MATH_BUILTIN_LIBM(erf) - #define simde_math_erf(v) __builtin_erf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erf(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erf(v) erf(v) - #endif -#endif - -#if !defined(simde_math_erff) - #if SIMDE_MATH_BUILTIN_LIBM(erff) - #define simde_math_erff(v) __builtin_erff(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erff(v) std::erf(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erff(v) erff(v) - #endif -#endif - -#if !defined(simde_math_erfc) - #if SIMDE_MATH_BUILTIN_LIBM(erfc) - #define simde_math_erfc(v) __builtin_erfc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfc(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfc(v) erfc(v) - #endif -#endif - -#if !defined(simde_math_erfcf) - #if SIMDE_MATH_BUILTIN_LIBM(erfcf) - #define simde_math_erfcf(v) __builtin_erfcf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_erfcf(v) std::erfc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_erfcf(v) erfcf(v) - #endif -#endif - -#if !defined(simde_math_exp) - #if SIMDE_MATH_BUILTIN_LIBM(exp) - #define simde_math_exp(v) __builtin_exp(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp(v) exp(v) - #endif -#endif - -#if !defined(simde_math_expf) - #if SIMDE_MATH_BUILTIN_LIBM(expf) - #define simde_math_expf(v) __builtin_expf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expf(v) std::exp(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expf(v) expf(v) - #endif -#endif - -#if !defined(simde_math_expm1) - #if SIMDE_MATH_BUILTIN_LIBM(expm1) - #define simde_math_expm1(v) __builtin_expm1(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1(v) expm1(v) - #endif -#endif - -#if !defined(simde_math_expm1f) - #if SIMDE_MATH_BUILTIN_LIBM(expm1f) - #define simde_math_expm1f(v) __builtin_expm1f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_expm1f(v) std::expm1(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_expm1f(v) expm1f(v) - #endif -#endif - -#if !defined(simde_math_exp2) - #if SIMDE_MATH_BUILTIN_LIBM(exp2) - #define simde_math_exp2(v) __builtin_exp2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2(v) exp2(v) - #endif -#endif - -#if !defined(simde_math_exp2f) - #if SIMDE_MATH_BUILTIN_LIBM(exp2f) - #define simde_math_exp2f(v) __builtin_exp2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_exp2f(v) std::exp2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_exp2f(v) exp2f(v) - #endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10(v) __builtin_exp10(v) -#else -# define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - # define simde_math_exp10f(v) __builtin_exp10f(v) -#else -# define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) - #if SIMDE_MATH_BUILTIN_LIBM(fabs) - #define simde_math_fabs(v) __builtin_fabs(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabs(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabs(v) fabs(v) - #endif -#endif - -#if !defined(simde_math_fabsf) - #if SIMDE_MATH_BUILTIN_LIBM(fabsf) - #define simde_math_fabsf(v) __builtin_fabsf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fabsf(v) std::fabs(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fabsf(v) fabsf(v) - #endif -#endif - -#if !defined(simde_math_floor) - #if SIMDE_MATH_BUILTIN_LIBM(floor) - #define simde_math_floor(v) __builtin_floor(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floor(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floor(v) floor(v) - #endif -#endif - -#if !defined(simde_math_floorf) - #if SIMDE_MATH_BUILTIN_LIBM(floorf) - #define simde_math_floorf(v) __builtin_floorf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_floorf(v) std::floor(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_floorf(v) floorf(v) - #endif -#endif - -#if !defined(simde_math_fma) - #if SIMDE_MATH_BUILTIN_LIBM(fma) - #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fma(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fma(x, y, z) fma(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmaf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaf) - #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaf(x, y, z) std::fma(x, y, z) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaf(x, y, z) fmaf(x, y, z) - #endif -#endif - -#if !defined(simde_math_fmax) - #if SIMDE_MATH_BUILTIN_LIBM(fmax) - #define simde_math_fmax(x, y) __builtin_fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmax(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmax(x, y) fmax(x, y) - #endif -#endif - -#if !defined(simde_math_fmaxf) - #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) - #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_fmaxf(x, y) std::fmax(x, y) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_fmaxf(x, y) fmaxf(x, y) - #endif -#endif - -#if !defined(simde_math_hypot) - #if SIMDE_MATH_BUILTIN_LIBM(hypot) - #define simde_math_hypot(y, x) __builtin_hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypot(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypot(y, x) hypot(y, x) - #endif -#endif - -#if !defined(simde_math_hypotf) - #if SIMDE_MATH_BUILTIN_LIBM(hypotf) - #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_hypotf(y, x) std::hypot(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_hypotf(y, x) hypotf(y, x) - #endif -#endif - -#if !defined(simde_math_log) - #if SIMDE_MATH_BUILTIN_LIBM(log) - #define simde_math_log(v) __builtin_log(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log(v) log(v) - #endif -#endif - -#if !defined(simde_math_logf) - #if SIMDE_MATH_BUILTIN_LIBM(logf) - #define simde_math_logf(v) __builtin_logf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logf(v) std::log(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logf(v) logf(v) - #endif -#endif - -#if !defined(simde_math_logb) - #if SIMDE_MATH_BUILTIN_LIBM(logb) - #define simde_math_logb(v) __builtin_logb(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logb(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logb(v) logb(v) - #endif -#endif - -#if !defined(simde_math_logbf) - #if SIMDE_MATH_BUILTIN_LIBM(logbf) - #define simde_math_logbf(v) __builtin_logbf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_logbf(v) std::logb(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_logbf(v) logbf(v) - #endif -#endif - -#if !defined(simde_math_log1p) - #if SIMDE_MATH_BUILTIN_LIBM(log1p) - #define simde_math_log1p(v) __builtin_log1p(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1p(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1p(v) log1p(v) - #endif -#endif - -#if !defined(simde_math_log1pf) - #if SIMDE_MATH_BUILTIN_LIBM(log1pf) - #define simde_math_log1pf(v) __builtin_log1pf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log1pf(v) std::log1p(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log1pf(v) log1pf(v) - #endif -#endif - -#if !defined(simde_math_log2) - #if SIMDE_MATH_BUILTIN_LIBM(log2) - #define simde_math_log2(v) __builtin_log2(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2(v) log2(v) - #endif -#endif - -#if !defined(simde_math_log2f) - #if SIMDE_MATH_BUILTIN_LIBM(log2f) - #define simde_math_log2f(v) __builtin_log2f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log2f(v) std::log2(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log2f(v) log2f(v) - #endif -#endif - -#if !defined(simde_math_log10) - #if SIMDE_MATH_BUILTIN_LIBM(log10) - #define simde_math_log10(v) __builtin_log10(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10(v) log10(v) - #endif -#endif - -#if !defined(simde_math_log10f) - #if SIMDE_MATH_BUILTIN_LIBM(log10f) - #define simde_math_log10f(v) __builtin_log10f(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_log10f(v) std::log10(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_log10f(v) log10f(v) - #endif -#endif - -#if !defined(simde_math_modf) - #if SIMDE_MATH_BUILTIN_LIBM(modf) - #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modf(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modf(x, iptr) modf(x, iptr) - #endif -#endif - -#if !defined(simde_math_modff) - #if SIMDE_MATH_BUILTIN_LIBM(modff) - #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_modff(x, iptr) std::modf(x, iptr) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_modff(x, iptr) modff(x, iptr) - #endif -#endif - -#if !defined(simde_math_nearbyint) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) - #define simde_math_nearbyint(v) __builtin_nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyint(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyint(v) nearbyint(v) - #endif -#endif - -#if !defined(simde_math_nearbyintf) - #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) - #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_nearbyintf(v) std::nearbyint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_nearbyintf(v) nearbyintf(v) - #endif -#endif - -#if !defined(simde_math_pow) - #if SIMDE_MATH_BUILTIN_LIBM(pow) - #define simde_math_pow(y, x) __builtin_pow(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_pow(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_pow(y, x) pow(y, x) - #endif -#endif - -#if !defined(simde_math_powf) - #if SIMDE_MATH_BUILTIN_LIBM(powf) - #define simde_math_powf(y, x) __builtin_powf(y, x) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_powf(y, x) std::pow(y, x) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_powf(y, x) powf(y, x) - #endif -#endif - -#if !defined(simde_math_rint) - #if SIMDE_MATH_BUILTIN_LIBM(rint) - #define simde_math_rint(v) __builtin_rint(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rint(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rint(v) rint(v) - #endif -#endif - -#if !defined(simde_math_rintf) - #if SIMDE_MATH_BUILTIN_LIBM(rintf) - #define simde_math_rintf(v) __builtin_rintf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_rintf(v) std::rint(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_rintf(v) rintf(v) - #endif -#endif - -#if !defined(simde_math_round) - #if SIMDE_MATH_BUILTIN_LIBM(round) - #define simde_math_round(v) __builtin_round(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_round(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_round(v) round(v) - #endif -#endif - -#if !defined(simde_math_roundf) - #if SIMDE_MATH_BUILTIN_LIBM(roundf) - #define simde_math_roundf(v) __builtin_roundf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_roundf(v) std::round(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_roundf(v) roundf(v) - #endif -#endif - -#if !defined(simde_math_roundeven) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundeven(v) __builtin_roundeven(v) - #elif defined(simde_math_round) && defined(simde_math_fabs) - static HEDLEY_INLINE - double - simde_math_roundeven(double v) { - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundeven simde_math_roundeven - #endif -#endif - -#if !defined(simde_math_roundevenf) - #if \ - (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ - HEDLEY_GCC_VERSION_CHECK(10,0,0) - #define simde_math_roundevenf(v) __builtin_roundevenf(v) - #elif defined(simde_math_roundf) && defined(simde_math_fabsf) - static HEDLEY_INLINE - float - simde_math_roundevenf(float v) { - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; - } - #define simde_math_roundevenf simde_math_roundevenf - #endif -#endif - -#if !defined(simde_math_sin) - #if SIMDE_MATH_BUILTIN_LIBM(sin) - #define simde_math_sin(v) __builtin_sin(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sin(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sin(v) sin(v) - #endif -#endif - -#if !defined(simde_math_sinf) - #if SIMDE_MATH_BUILTIN_LIBM(sinf) - #define simde_math_sinf(v) __builtin_sinf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinf(v) std::sin(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinf(v) sinf(v) - #endif -#endif - -#if !defined(simde_math_sinh) - #if SIMDE_MATH_BUILTIN_LIBM(sinh) - #define simde_math_sinh(v) __builtin_sinh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinh(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinh(v) sinh(v) - #endif -#endif - -#if !defined(simde_math_sinhf) - #if SIMDE_MATH_BUILTIN_LIBM(sinhf) - #define simde_math_sinhf(v) __builtin_sinhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sinhf(v) std::sinh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sinhf(v) sinhf(v) - #endif -#endif - -#if !defined(simde_math_sqrt) - #if SIMDE_MATH_BUILTIN_LIBM(sqrt) - #define simde_math_sqrt(v) __builtin_sqrt(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrt(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrt(v) sqrt(v) - #endif -#endif - -#if !defined(simde_math_sqrtf) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) - #define simde_math_sqrtf(v) __builtin_sqrtf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtf(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtf(v) sqrtf(v) - #endif -#endif - -#if !defined(simde_math_sqrtl) - #if SIMDE_MATH_BUILTIN_LIBM(sqrtl) - #define simde_math_sqrtl(v) __builtin_sqrtl(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_sqrtl(v) std::sqrt(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_sqrtl(v) sqrtl(v) - #endif -#endif - -#if !defined(simde_math_tan) - #if SIMDE_MATH_BUILTIN_LIBM(tan) - #define simde_math_tan(v) __builtin_tan(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tan(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tan(v) tan(v) - #endif -#endif - -#if !defined(simde_math_tanf) - #if SIMDE_MATH_BUILTIN_LIBM(tanf) - #define simde_math_tanf(v) __builtin_tanf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanf(v) std::tan(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanf(v) tanf(v) - #endif -#endif - -#if !defined(simde_math_tanh) - #if SIMDE_MATH_BUILTIN_LIBM(tanh) - #define simde_math_tanh(v) __builtin_tanh(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanh(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanh(v) tanh(v) - #endif -#endif - -#if !defined(simde_math_tanhf) - #if SIMDE_MATH_BUILTIN_LIBM(tanhf) - #define simde_math_tanhf(v) __builtin_tanhf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_tanhf(v) std::tanh(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_tanhf(v) tanhf(v) - #endif -#endif - -#if !defined(simde_math_trunc) - #if SIMDE_MATH_BUILTIN_LIBM(trunc) - #define simde_math_trunc(v) __builtin_trunc(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_trunc(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_trunc(v) trunc(v) - #endif -#endif - -#if !defined(simde_math_truncf) - #if SIMDE_MATH_BUILTIN_LIBM(truncf) - #define simde_math_truncf(v) __builtin_truncf(v) - #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_truncf(v) std::trunc(v) - #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_truncf(v) truncf(v) - #endif -#endif - -/*** Comparison macros (which don't raise invalid errors) ***/ - -#if defined(isunordered) - #define simde_math_isunordered(x, y) isunordered(x, y) -#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) - #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) -#else - static HEDLEY_INLINE - int simde_math_isunordered(double x, double y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunordered simde_math_isunordered - - static HEDLEY_INLINE - int simde_math_isunorderedf(float x, float y) { - return (x != y) && (x != x || y != y); - } - #define simde_math_isunorderedf simde_math_isunorderedf -#endif -#if !defined(simde_math_isunorderedf) - #define simde_math_isunorderedf simde_math_isunordered -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) - static HEDLEY_INLINE - double - simde_math_cdfnorm(double x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); - } - #define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) - static HEDLEY_INLINE - float - simde_math_cdfnormf(float x) { - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); - } - #define simde_math_cdfnormf simde_math_cdfnormf -#endif - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) - /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ - static HEDLEY_INLINE - double - simde_math_cdfnorminv(double p) { - static const double a[6] = { - -3.969683028665376e+01, - 2.209460984245205e+02, - -2.759285104469687e+02, - 1.383577518672690e+02, - -3.066479806614716e+01, - 2.506628277459239e+00 - }; - - static const double b[5] = { - -5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01 - }; - - static const double c[6] = { - -7.784894002430293e-03, - -3.223964580411365e-01, - -2.400758277161838e+00, - -2.549732539343734e+00, - 4.374664141464968e+00, - 2.938163982698783e+00 - }; - - static const double d[4] = { - 7.784695709041462e-03, - 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00 - }; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_cdfnorminvf(float p) { - static const float a[6] = { - -3.969683028665376e+01f, - 2.209460984245205e+02f, - -2.759285104469687e+02f, - 1.383577518672690e+02f, - -3.066479806614716e+01f, - 2.506628277459239e+00f - }; - static const float b[5] = { - -5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f - }; - static const float c[6] = { - -7.784894002430293e-03f, - -3.223964580411365e-01f, - -2.400758277161838e+00f, - -2.549732539343734e+00f, - 4.374664141464968e+00f, - 2.938163982698783e+00f - }; - static const float d[4] = { - 7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f - }; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return - (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return - -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * - q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); - } - } - #define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfinv(double x) { - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); - } - #define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfinvf(float x) { - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); - } - #define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) - static HEDLEY_INLINE - double - simde_math_erfcinv(double x) { - if(x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - static const double p[6] = { - 0.1550470003116, - 1.382719649631, - 0.690969348887, - -1.128081391617, - 0.680544246825, - -0.16444156791 - }; - static const double q[3] = { - 0.155024849822, - 1.385228141995, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - static const double p[4] = { - 0.00980456202915, - 0.363667889171, - 0.97302949837, - -0.5374947401 - }; - static const double q[3] = { - 0.00980451277802, - 0.363699971544, - 1.000000000000 - }; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } - } - - #define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) - static HEDLEY_INLINE - float - simde_math_erfcinvf(float x) { - if(x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = { - 0.1550470003116f, - 1.382719649631f, - 0.690969348887f, - -1.128081391617f, - 0.680544246825f - -0.164441567910f - }; - static const float q[3] = { - 0.155024849822f, - 1.385228141995f, - 1.000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = { - 0.00980456202915f, - 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f - }; - static const float q[3] = { - 0.00980451277802f, - 0.36369997154400f, - 1.00000000000000f - }; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; - } - } - - #define simde_math_erfcinvf simde_math_erfcinvf -#endif - -static HEDLEY_INLINE -double -simde_math_rad2deg(double radians) { - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE -float -simde_math_rad2degf(float radians) { - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE -double -simde_math_deg2rad(double degrees) { - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE -float -simde_math_deg2radf(float degrees) { - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE -int8_t -simde_math_adds_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_adds_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_adds_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_adds_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_adds_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); - #else - uint8_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_adds_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); - #else - uint16_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_adds_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); - #else - uint32_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_adds_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); - #else - uint64_t r = a + b; - r |= -(r < a); - return r; - #endif -} - -static HEDLEY_INLINE -int8_t -simde_math_subs_i8(int8_t a, int8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); - #else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); - #endif -} - -static HEDLEY_INLINE -int16_t -simde_math_subs_i16(int16_t a, int16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); - #else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); - #endif -} - -static HEDLEY_INLINE -int32_t -simde_math_subs_i32(int32_t a, int32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); - #else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); - #endif -} - -static HEDLEY_INLINE -int64_t -simde_math_subs_i64(int64_t a, int64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); - #else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); - #endif -} - -static HEDLEY_INLINE -uint8_t -simde_math_subs_u8(uint8_t a, uint8_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); - #else - uint8_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint16_t -simde_math_subs_u16(uint16_t a, uint16_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); - #else - uint16_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint32_t -simde_math_subs_u32(uint32_t a, uint32_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); - #else - uint32_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -static HEDLEY_INLINE -uint64_t -simde_math_subs_u64(uint64_t a, uint64_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); - #else - uint64_t res = a - b; - res &= -(res <= a); - return res; - #endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ -/* :: End simde/simde-math.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-constify.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: result = func_name(__VA_ARGS__, 0); break; \ - case 1: result = func_name(__VA_ARGS__, 1); break; \ - case 2: result = func_name(__VA_ARGS__, 2); break; \ - case 3: result = func_name(__VA_ARGS__, 3); break; \ - case 4: result = func_name(__VA_ARGS__, 4); break; \ - case 5: result = func_name(__VA_ARGS__, 5); break; \ - case 6: result = func_name(__VA_ARGS__, 6); break; \ - case 7: result = func_name(__VA_ARGS__, 7); break; \ - case 8: result = func_name(__VA_ARGS__, 8); break; \ - case 9: result = func_name(__VA_ARGS__, 9); break; \ - case 10: result = func_name(__VA_ARGS__, 10); break; \ - case 11: result = func_name(__VA_ARGS__, 11); break; \ - case 12: result = func_name(__VA_ARGS__, 12); break; \ - case 13: result = func_name(__VA_ARGS__, 13); break; \ - case 14: result = func_name(__VA_ARGS__, 14); break; \ - case 15: result = func_name(__VA_ARGS__, 15); break; \ - case 16: result = func_name(__VA_ARGS__, 16); break; \ - case 17: result = func_name(__VA_ARGS__, 17); break; \ - case 18: result = func_name(__VA_ARGS__, 18); break; \ - case 19: result = func_name(__VA_ARGS__, 19); break; \ - case 20: result = func_name(__VA_ARGS__, 20); break; \ - case 21: result = func_name(__VA_ARGS__, 21); break; \ - case 22: result = func_name(__VA_ARGS__, 22); break; \ - case 23: result = func_name(__VA_ARGS__, 23); break; \ - case 24: result = func_name(__VA_ARGS__, 24); break; \ - case 25: result = func_name(__VA_ARGS__, 25); break; \ - case 26: result = func_name(__VA_ARGS__, 26); break; \ - case 27: result = func_name(__VA_ARGS__, 27); break; \ - case 28: result = func_name(__VA_ARGS__, 28); break; \ - case 29: result = func_name(__VA_ARGS__, 29); break; \ - case 30: result = func_name(__VA_ARGS__, 30); break; \ - case 31: result = func_name(__VA_ARGS__, 31); break; \ - case 32: result = func_name(__VA_ARGS__, 32); break; \ - case 33: result = func_name(__VA_ARGS__, 33); break; \ - case 34: result = func_name(__VA_ARGS__, 34); break; \ - case 35: result = func_name(__VA_ARGS__, 35); break; \ - case 36: result = func_name(__VA_ARGS__, 36); break; \ - case 37: result = func_name(__VA_ARGS__, 37); break; \ - case 38: result = func_name(__VA_ARGS__, 38); break; \ - case 39: result = func_name(__VA_ARGS__, 39); break; \ - case 40: result = func_name(__VA_ARGS__, 40); break; \ - case 41: result = func_name(__VA_ARGS__, 41); break; \ - case 42: result = func_name(__VA_ARGS__, 42); break; \ - case 43: result = func_name(__VA_ARGS__, 43); break; \ - case 44: result = func_name(__VA_ARGS__, 44); break; \ - case 45: result = func_name(__VA_ARGS__, 45); break; \ - case 46: result = func_name(__VA_ARGS__, 46); break; \ - case 47: result = func_name(__VA_ARGS__, 47); break; \ - case 48: result = func_name(__VA_ARGS__, 48); break; \ - case 49: result = func_name(__VA_ARGS__, 49); break; \ - case 50: result = func_name(__VA_ARGS__, 50); break; \ - case 51: result = func_name(__VA_ARGS__, 51); break; \ - case 52: result = func_name(__VA_ARGS__, 52); break; \ - case 53: result = func_name(__VA_ARGS__, 53); break; \ - case 54: result = func_name(__VA_ARGS__, 54); break; \ - case 55: result = func_name(__VA_ARGS__, 55); break; \ - case 56: result = func_name(__VA_ARGS__, 56); break; \ - case 57: result = func_name(__VA_ARGS__, 57); break; \ - case 58: result = func_name(__VA_ARGS__, 58); break; \ - case 59: result = func_name(__VA_ARGS__, 59); break; \ - case 60: result = func_name(__VA_ARGS__, 60); break; \ - case 61: result = func_name(__VA_ARGS__, 61); break; \ - case 62: result = func_name(__VA_ARGS__, 62); break; \ - case 63: result = func_name(__VA_ARGS__, 63); break; \ - default: result = default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - default: default_case; break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch(imm) { \ - case 0: func_name(__VA_ARGS__, 0); break; \ - case 1: func_name(__VA_ARGS__, 1); break; \ - case 2: func_name(__VA_ARGS__, 2); break; \ - case 3: func_name(__VA_ARGS__, 3); break; \ - case 4: func_name(__VA_ARGS__, 4); break; \ - case 5: func_name(__VA_ARGS__, 5); break; \ - case 6: func_name(__VA_ARGS__, 6); break; \ - case 7: func_name(__VA_ARGS__, 7); break; \ - case 8: func_name(__VA_ARGS__, 8); break; \ - case 9: func_name(__VA_ARGS__, 9); break; \ - case 10: func_name(__VA_ARGS__, 10); break; \ - case 11: func_name(__VA_ARGS__, 11); break; \ - case 12: func_name(__VA_ARGS__, 12); break; \ - case 13: func_name(__VA_ARGS__, 13); break; \ - case 14: func_name(__VA_ARGS__, 14); break; \ - case 15: func_name(__VA_ARGS__, 15); break; \ - case 16: func_name(__VA_ARGS__, 16); break; \ - case 17: func_name(__VA_ARGS__, 17); break; \ - case 18: func_name(__VA_ARGS__, 18); break; \ - case 19: func_name(__VA_ARGS__, 19); break; \ - case 20: func_name(__VA_ARGS__, 20); break; \ - case 21: func_name(__VA_ARGS__, 21); break; \ - case 22: func_name(__VA_ARGS__, 22); break; \ - case 23: func_name(__VA_ARGS__, 23); break; \ - case 24: func_name(__VA_ARGS__, 24); break; \ - case 25: func_name(__VA_ARGS__, 25); break; \ - case 26: func_name(__VA_ARGS__, 26); break; \ - case 27: func_name(__VA_ARGS__, 27); break; \ - case 28: func_name(__VA_ARGS__, 28); break; \ - case 29: func_name(__VA_ARGS__, 29); break; \ - case 30: func_name(__VA_ARGS__, 30); break; \ - case 31: func_name(__VA_ARGS__, 31); break; \ - case 32: func_name(__VA_ARGS__, 32); break; \ - case 33: func_name(__VA_ARGS__, 33); break; \ - case 34: func_name(__VA_ARGS__, 34); break; \ - case 35: func_name(__VA_ARGS__, 35); break; \ - case 36: func_name(__VA_ARGS__, 36); break; \ - case 37: func_name(__VA_ARGS__, 37); break; \ - case 38: func_name(__VA_ARGS__, 38); break; \ - case 39: func_name(__VA_ARGS__, 39); break; \ - case 40: func_name(__VA_ARGS__, 40); break; \ - case 41: func_name(__VA_ARGS__, 41); break; \ - case 42: func_name(__VA_ARGS__, 42); break; \ - case 43: func_name(__VA_ARGS__, 43); break; \ - case 44: func_name(__VA_ARGS__, 44); break; \ - case 45: func_name(__VA_ARGS__, 45); break; \ - case 46: func_name(__VA_ARGS__, 46); break; \ - case 47: func_name(__VA_ARGS__, 47); break; \ - case 48: func_name(__VA_ARGS__, 48); break; \ - case 49: func_name(__VA_ARGS__, 49); break; \ - case 50: func_name(__VA_ARGS__, 50); break; \ - case 51: func_name(__VA_ARGS__, 51); break; \ - case 52: func_name(__VA_ARGS__, 52); break; \ - case 53: func_name(__VA_ARGS__, 53); break; \ - case 54: func_name(__VA_ARGS__, 54); break; \ - case 55: func_name(__VA_ARGS__, 55); break; \ - case 56: func_name(__VA_ARGS__, 56); break; \ - case 57: func_name(__VA_ARGS__, 57); break; \ - case 58: func_name(__VA_ARGS__, 58); break; \ - case 59: func_name(__VA_ARGS__, 59); break; \ - case 60: func_name(__VA_ARGS__, 60); break; \ - case 61: func_name(__VA_ARGS__, 61); break; \ - case 62: func_name(__VA_ARGS__, 62); break; \ - case 63: func_name(__VA_ARGS__, 63); break; \ - default: default_case; break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif -/* :: End simde/simde-constify.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-align.h :: */ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) - #if defined(__SIZE_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__SIZE_T_TYPE__) - #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #else - #include - #define SIMDE_ALIGN_SIZE_T_ size_t - #endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) - #if defined(__INTPTR_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ - #elif defined(__PTRDIFF_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ - #elif defined(__PTRDIFF_T_TYPE__) - #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ - #elif defined(__cplusplus) - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #else - #include - #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t - #endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) - #if defined(__cplusplus) - #include - #else - #include - #endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) - #define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) - #define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ - HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - defined(__IBM__ALIGNOF__) || \ - defined(__clang__) - #define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif \ - HEDLEY_IAR_VERSION_CHECK(8,40,0) - #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif \ - HEDLEY_MSVC_VERSION_CHECK(19,0,0) - /* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ - #define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. Older versions of MSVC is the exception, so we need to cap - * the alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) - #if defined(HEDLEY_MSVC_VERSION) - #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) - // Visual studio 2017 and newer does not need a max - #else - #if defined(_M_IX86) || defined(_M_AMD64) - #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 - #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) - /* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 - #else - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif - #elif defined(_M_ARM) || defined(_M_ARM64) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 - #endif - #endif - #elif defined(HEDLEY_IBM_VERSION) - #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 - #endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) - #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 - #define SIMDE_ALIGN_64_ 32 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 - #define SIMDE_ALIGN_64_ 16 - #define SIMDE_ALIGN_32_ 16 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 - #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 - #define SIMDE_ALIGN_64_ 8 - #define SIMDE_ALIGN_32_ 8 - #define SIMDE_ALIGN_16_ 8 - #define SIMDE_ALIGN_8_ 8 - #else - #error Max alignment expected to be >= 8 - #endif -#else - #define SIMDE_ALIGN_64_ 64 - #define SIMDE_ALIGN_32_ 32 - #define SIMDE_ALIGN_16_ 16 - #define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) - #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else - #define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if \ - HEDLEY_HAS_ATTRIBUTE(aligned) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ - HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) - #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) - #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) - #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif \ - defined(HEDLEY_MSVC_VERSION) - #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) - /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ - #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if \ - HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4,7,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) -#else - #if defined(__cplusplus) - template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) - #else - HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) - #endif - { - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; - } - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) - #else - #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) - #endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else - #include - #if defined(__cplusplus) - template - static HEDLEY_ALWAYS_INLINE - T* - simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #else - static HEDLEY_ALWAYS_INLINE - void* - simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) - #endif - { - if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), - HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; - } - - #if defined(__cplusplus) - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) - #else - #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) - #endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) - #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else - #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 - #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 - #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 - #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) - #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else - #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ -/* :: End simde/simde-align.h :: */ - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) - #define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) - #if defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_NANS - #elif defined(__FINITE_MATH_ONLY__) - #if __FINITE_MATH_ONLY__ - #define SIMDE_FAST_NANS - #endif - #endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_CONVERSION_RANGE -#endif - -/* Due to differences across platforms, sometimes it can be much - * faster for us to allow spurious floating point exceptions, - * or to no generate them when we should. */ -#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) - #define SIMDE_FAST_EXCEPTIONS -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) - #include - #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) - #if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ - (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) - #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") - #else - #define SIMDE_REQUIRE_CONSTANT(arg) - #endif -#else - #define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) - /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which - * starts with a double-underscore. This is a system header so we have no - * control over it, but since it's a macro it will emit a diagnostic which - * prevents compilation with -Werror. */ - #if HEDLEY_HAS_WARNING("-Wreserved-identifier") - #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ - _Static_assert(expr, message); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) - #endif -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16,0,0) - #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#endif - -/* Statement exprs */ -#if \ - HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ - HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ - HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ - HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ - HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) -#endif - -/* This is just a convenience macro to make it easy to call a single - * function with a specific diagnostic disabled. */ -#if defined(SIMDE_STATEMENT_EXPR_) - #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ - SIMDE_STATEMENT_EXPR_(({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - diagnostic \ - (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#endif - -#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) - #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") -#endif - -#if \ - (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) -# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -# define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -# if \ - HEDLEY_GCC_VERSION_CHECK(4,8,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SCALAR -# define SIMDE_VECTOR_SUBSCRIPT -# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -# define SIMDE_VECTOR_SUBSCRIPT -# elif \ - HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# elif HEDLEY_HAS_ATTRIBUTE(vector_size) -# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -# define SIMDE_VECTOR_OPS -# define SIMDE_VECTOR_NEGATE -# define SIMDE_VECTOR_SUBSCRIPT -# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) -# define SIMDE_VECTOR_SCALAR -# endif -# endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) - HEDLEY_DIAGNOSTIC_PUSH - /* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -# endif -# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) -# pragma GCC diagnostic ignored "-Wvariadic-macros" -# endif - -# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) -# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) -# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -# endif - HEDLEY_DIAGNOSTIC_POP -# endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) -# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) - /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void) from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -# else -# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) -# endif -# endif -# endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -# if defined(SIMDE_VECTOR_OPS) -# define SIMDE_VECTOR_SUBSCRIPT_OPS -# endif -# if defined(SIMDE_VECTOR_SCALAR) -# define SIMDE_VECTOR_SUBSCRIPT_SCALAR -# endif -#endif - -#if !defined(SIMDE_DISABLE_OPENMP) - #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) - #define SIMDE_ENABLE_OPENMP - #endif -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -# define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -# if defined(__clang__) -# define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) \ - HEDLEY_DIAGNOSTIC_POP -# else -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -# endif -# if !defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -# else -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) -# endif -#elif defined(SIMDE_ENABLE_CILKPLUS) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_ALIGNED(a) -#else -# define SIMDE_VECTORIZE -# define SIMDE_VECTORIZE_SAFELEN(l) -# define SIMDE_VECTORIZE_REDUCTION(r) -# define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if defined(SIMDE_NO_INLINE) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#elif defined(SIMDE_CONSTRAINED_COMPILATION) -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static -#else -# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if \ - HEDLEY_HAS_ATTRIBUTE(unused) || \ - HEDLEY_GCC_VERSION_CHECK(2,95,0) -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ - -#if defined(_MSC_VER) -# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -# define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - HEDLEY_BEGIN_C_DECLS -# define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -# define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -# define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -# define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# elif defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__s390x__) || defined(__zarch__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -# elif defined(_WIN32) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(sun) || defined(__sun) /* Solaris */ -# include -# if defined(_LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(_BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__APPLE__) -# include -# if defined(__LITTLE_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BIG_ENDIAN__) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -# include -# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -# include -# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) -# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -# endif -# endif -#endif - -#if \ - HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) - #define simde_bswap64(v) _byteswap_uint64(v) -#else - SIMDE_FUNCTION_ATTRIBUTES - uint64_t - simde_bswap64(uint64_t v) { - return - ((v & (((uint64_t) 0xff) << 56)) >> 56) | - ((v & (((uint64_t) 0xff) << 48)) >> 40) | - ((v & (((uint64_t) 0xff) << 40)) >> 24) | - ((v & (((uint64_t) 0xff) << 32)) >> 8) | - ((v & (((uint64_t) 0xff) << 24)) << 8) | - ((v & (((uint64_t) 0xff) << 16)) << 24) | - ((v & (((uint64_t) 0xff) << 8)) << 40) | - ((v & (((uint64_t) 0xff) )) << 56); - } -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -# error Unknown byte order; please file a bug -#else -# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -# define simde_endian_bswap64_be(value) simde_bswap64(value) -# define simde_endian_bswap64_le(value) (value) -# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -# define simde_endian_bswap64_be(value) (value) -# define simde_endian_bswap64_le(value) simde_bswap64(value) -# endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -# define SIMDE_FLOAT32_TYPE float -# define SIMDE_FLOAT32_C(value) value##f -#else -# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -# define SIMDE_FLOAT64_TYPE double -# define SIMDE_FLOAT64_C(value) value -#else -# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if defined(SIMDE_POLY8_TYPE) -# undef SIMDE_POLY8_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY8_TYPE poly8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(poly8_t, value)) -#else -# define SIMDE_POLY8_TYPE uint8_t -# define SIMDE_POLY8_C(value) (HEDLEY_STATIC_CAST(uint8_t, value)) -#endif -typedef SIMDE_POLY8_TYPE simde_poly8; - -#if defined(SIMDE_POLY16_TYPE) -# undef SIMDE_POLY16_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define SIMDE_POLY16_TYPE poly16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(poly16_t, value)) -#else -# define SIMDE_POLY16_TYPE uint16_t -# define SIMDE_POLY16_C(value) (HEDLEY_STATIC_CAST(uint16_t, value)) -#endif -typedef SIMDE_POLY16_TYPE simde_poly16; - -#if defined(SIMDE_POLY64_TYPE) -# undef SIMDE_POLY64_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_POLY64_TYPE poly64_t -# define SIMDE_POLY64_C(value) (HEDLEY_STATIC_CAST(poly64_t, value ## ull)) -#else -# define SIMDE_POLY64_TYPE uint64_t -# define SIMDE_POLY64_C(value) value ## ull -#endif -typedef SIMDE_POLY64_TYPE simde_poly64; - -#if defined(SIMDE_POLY128_TYPE) -# undef SIMDE_POLY128_TYPE -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) -# define SIMDE_POLY128_TYPE poly128_t -# define SIMDE_POLY128_C(value) value -#elif defined(__SIZEOF_INT128__) -# define SIMDE_POLY128_TYPE __int128 -# define SIMDE_POLY128_C(value) (HEDLEY_STATIC_CAST(__int128, value)) -#else -# define SIMDE_POLY128_TYPE uint64_t -# define SIMDE_TARGET_NOT_SUPPORT_INT128_TYPE 1 -#endif -typedef SIMDE_POLY128_TYPE simde_poly128; - -#if defined(__cplusplus) - typedef bool simde_bool; -#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - typedef _Bool simde_bool; -#elif defined(bool) - typedef bool simde_bool; -#else - #include - typedef bool simde_bool; -#endif - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -# define SIMDE_CONVERT_FTOI(T,v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) \ - HEDLEY_DIAGNOSTIC_POP -#else -# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) -#else - #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) - #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -# define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -# define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -# if \ - defined(HEDLEY_PGI_VERSION) || \ - defined(HEDLEY_MSVC_VERSION) -# define SIMDE_STDC_HOSTED 1 -# else -# define SIMDE_STDC_HOSTED 0 -# endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) - #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) - #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) - #endif -#endif -#if !defined(simde_memset) - #if HEDLEY_HAS_BUILTIN(__builtin_memset) - #define simde_memset(s, c, n) __builtin_memset(s, c, n) - #endif -#endif -#if !defined(simde_memcmp) - #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) - #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) - #endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) - #if !defined(SIMDE_NO_STRING_H) - #if defined(__has_include) - #if !__has_include() - #define SIMDE_NO_STRING_H - #endif - #elif (SIMDE_STDC_HOSTED == 0) - #define SIMDE_NO_STRING_H - #endif - #endif - - #if !defined(SIMDE_NO_STRING_H) - #include - #if !defined(simde_memcpy) - #define simde_memcpy(dest, src, n) memcpy(dest, src, n) - #endif - #if !defined(simde_memset) - #define simde_memset(s, c, n) memset(s, c, n) - #endif - #if !defined(simde_memcmp) - #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) - #endif - #else - /* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ - #if !defined(simde_memcpy) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memcpy_(void* dest, const void* src, size_t len) { - char* dest_ = HEDLEY_STATIC_CAST(char*, dest); - char* src_ = HEDLEY_STATIC_CAST(const char*, src); - for (size_t i = 0 ; i < len ; i++) { - dest_[i] = src_[i]; - } - } - #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) - #endif - - #if !defined(simde_memset) - SIMDE_FUNCTION_ATTRIBUTES - void - simde_memset_(void* s, int c, size_t len) { - char* s_ = HEDLEY_STATIC_CAST(char*, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0 ; i < len ; i++) { - s_[i] = c_[i]; - } - } - #define simde_memset(s, c, n) simde_memset_(s, c, n) - #endif - - #if !defined(simde_memcmp) - SIMDE_FUCTION_ATTRIBUTES - int - simde_memcmp_(const void *s1, const void *s2, size_t n) { - unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); - unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); - for (size_t i = 0 ; i < len ; i++) { - if (s1_[i] != s2_[i]) { - return (int) (s1_[i] - s2_[i]); - } - } - return 0; - } - #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) - #endif - #endif -#endif - -/*** Functions that quiet a signaling NaN ***/ - -static HEDLEY_INLINE -double -simde_math_quiet(double x) { - uint64_t tmp, mask; - if (!simde_math_isnan(x)) { - return x; - } - simde_memcpy(&tmp, &x, 8); - mask = 0x7ff80000; - mask <<= 32; - tmp |= mask; - simde_memcpy(&x, &tmp, 8); - return x; -} - -static HEDLEY_INLINE -float -simde_math_quietf(float x) { - uint32_t tmp; - if (!simde_math_isnanf(x)) { - return x; - } - simde_memcpy(&tmp, &x, 4); - tmp |= 0x7fc00000lu; - simde_memcpy(&x, &tmp, 4); - return x; -} - -#if defined(FE_ALL_EXCEPT) - #define SIMDE_HAVE_FENV_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_FENV_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) - #define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) - #if __has_include() - #include - #define SIMDE_HAVE_STDLIB_H - #endif -#elif SIMDE_STDC_HOSTED == 1 - #include - #define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -# include -# elif __has_include() -# include -# endif -# if __has_include() -# include -# endif -#elif SIMDE_STDC_HOSTED == 1 -# include -# include -#endif - -#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ - static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ - T_To \ - Name (T_From value) { \ - T_To r; \ - simde_memcpy(&r, &value, sizeof(r)); \ - return r; \ - } - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/check.h :: */ -/* Check (assertions) - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#include - -#if !defined(_WIN32) -# define SIMDE_SIZE_MODIFIER "z" -# define SIMDE_CHAR_MODIFIER "hh" -# define SIMDE_SHORT_MODIFIER "h" -#else -# if defined(_M_X64) || defined(__amd64__) -# define SIMDE_SIZE_MODIFIER "I64" -# else -# define SIMDE_SIZE_MODIFIER "" -# endif -# define SIMDE_CHAR_MODIFIER "" -# define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) -# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -# define SIMDE_PUSH_DISABLE_MSVC_C4127_ -# define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -# if defined(__has_include) -# if __has_include() -# include -# endif -# elif defined(SIMDE_STDC_HOSTED) -# if SIMDE_STDC_HOSTED == 1 -# include -# endif -# elif defined(__STDC_HOSTED__) -# if __STDC_HOSTETD__ == 1 -# include -# endif -# endif - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/debug-trap.h :: */ -/* Debugging assertions and traps - * Portable Snippets - https://github.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -# define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -# if __has_builtin(__builtin_debugtrap) -# define simde_trap() __builtin_debugtrap() -# elif __has_builtin(__debugbreak) -# define simde_trap() __debugbreak() -# endif -#endif -#if !defined(simde_trap) -# if defined(_MSC_VER) || defined(__INTEL_COMPILER) -# define simde_trap() __debugbreak() -# elif defined(__ARMCC_VERSION) -# define simde_trap() __breakpoint(42) -# elif defined(__ibmxl__) || defined(__xlC__) -# include -# define simde_trap() __trap(42) -# elif defined(__DMC__) && defined(_M_IX86) - static inline void simde_trap(void) { __asm int 3h; } -# elif defined(__i386__) || defined(__x86_64__) - static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } -# elif defined(__thumb__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } -# elif defined(__aarch64__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } -# elif defined(__arm__) - static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } -# elif defined (__alpha__) && !defined(__osf__) - static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } -# elif defined(_54_) - static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } -# elif defined(_55_) - static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } -# elif defined(_64P_) - static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } -# elif defined(_6x_) - static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } -# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -# define simde_trap() __builtin_trap() -# else -# include -# if defined(SIGTRAP) -# define simde_trap() raise(SIGTRAP) -# else -# define simde_trap() raise(SIGABRT) -# endif -# endif -#endif - -#if defined(HEDLEY_LIKELY) -# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -# define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -# define simde_dbg_assert(expr) do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -# define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ -/* :: End simde/debug-trap.h :: */ - - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -# if defined(EOF) -# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) -# else -# define simde_errorf(format, ...) (simde_trap()) -# endif - HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -# if defined(SIMDE_CHECK_FAIL_DEFINED) -# define simde_assert(expr) -# else -# if defined(HEDLEY_ASSUME) -# define simde_assert(expr) HEDLEY_ASSUME(expr) -# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) -# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) -# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) -# define simde_assert(expr) __assume(expr) -# else -# define simde_assert(expr) -# endif -# endif -# define simde_assert_true(expr) simde_assert(expr) -# define simde_assert_false(expr) simde_assert(!(expr)) -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) -# define simde_assert_double_equal(a, b, precision) -# define simde_assert_string_equal(a, b) -# define simde_assert_string_not_equal(a, b) -# define simde_assert_memory_equal(size, a, b) -# define simde_assert_memory_not_equal(size, a, b) -#else -# define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ - -(simde_tmp_a_ - simde_tmp_b_) : \ - (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# include -# define simde_assert_string_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ - simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_string_not_equal(a, b) \ - do { \ - const char* simde_tmp_a_ = a; \ - const char* simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ - simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ - -# define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ - const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ - simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) \ - SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) \ - simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) \ - simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) \ - simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) \ - simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void*, "p", a, op, b) - -#define simde_assert_int8(a, op, b) \ - simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) \ - simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) \ - simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) \ - simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) \ - simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) \ - simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ -/* :: End simde/check.h :: */ - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ - #define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ l - #define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_8_ ll - #define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ - #define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ l - #define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_16_ ll - #define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ - #define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ l - #define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_32_ ll - #define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ - #define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ l - #define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) - #define SIMDE_BUILTIN_SUFFIX_64_ ll - #define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ -HEDLEY_DIAGNOSTIC_POP - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) - #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else - #define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) - #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else - #define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) - #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else - #define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) - #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else - #define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -#if !defined(__cplusplus) - #if defined(__clang__) - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") - #define SIMDE_GENERIC_(...) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ - _Generic(__VA_ARGS__); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #endif - #elif \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - HEDLEY_ARM_VERSION_CHECK(5,3,0) - #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) - #endif -#endif - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -# if defined(HEDLEY_GCC_VERSION) -# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) -# define SIMDE_BUG_GCC_REV_208793 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) -# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) -# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE -# endif -# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) -# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -# endif -# if !HEDLEY_GCC_VERSION_CHECK(7,4,0) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,3,0)) -# define SIMDE_BUG_GCC_87467 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) -# define SIMDE_BUG_GCC_REV_247851 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 -# define SIMDE_BUG_GCC_91341 -# define SIMDE_BUG_GCC_92035 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_BAD_VEXT_REV32 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(9,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && !HEDLEY_GCC_VERSION_CHECK(9,0,0)) \ - ) && defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -# define SIMDE_BUG_GCC_94482 -# endif -# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_53784 -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) -# define SIMDE_BUG_GCC_95483 -# endif -# if defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_100927 -# endif -# if !(HEDLEY_GCC_VERSION_CHECK(10,3,0)) -# define SIMDE_BUG_GCC_98521 -# endif -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_94488 -# endif -# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_REV_264019 -# endif -# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM)) -# define SIMDE_BUG_GCC_REV_260989 -# endif -# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_95399 -# define SIMDE_BUG_GCC_95471 -# define SIMDE_BUG_GCC_111609 -# elif defined(SIMDE_ARCH_POWER) -# define SIMDE_BUG_GCC_95227 -# define SIMDE_BUG_GCC_95782 -# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_GCC_96174 -# endif -# elif defined(SIMDE_ARCH_ZARCH) -# define SIMDE_BUG_GCC_95782 -# if HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_101614 -# endif -# endif -# if defined(SIMDE_ARCH_MIPS_MSA) -# define SIMDE_BUG_GCC_97248 -# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) -# define SIMDE_BUG_GCC_100760 -# define SIMDE_BUG_GCC_100761 -# define SIMDE_BUG_GCC_100762 -# endif -# endif -# if !defined(__OPTIMIZE__) && !(\ - HEDLEY_GCC_VERSION_CHECK(11,4,0) \ - || (HEDLEY_GCC_VERSION_CHECK(10,4,0) && !(HEDLEY_GCC_VERSION_CHECK(11,0,0))) \ - || (HEDLEY_GCC_VERSION_CHECK(9,5,0) && !(HEDLEY_GCC_VERSION_CHECK(10,0,0)))) -# define SIMDE_BUG_GCC_105339 -# endif -# elif defined(__clang__) -# if defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_CLANG_48257 // https://github.com/llvm/llvm-project/issues/47601 -# define SIMDE_BUG_CLANG_71362 // https://github.com/llvm/llvm-project/issues/71362 -# define SIMDE_BUG_CLANG_71365 // https://github.com/llvm/llvm-project/issues/71365 -# define SIMDE_BUG_CLANG_71751 // https://github.com/llvm/llvm-project/issues/71751 -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0) -# define SIMDE_BUG_CLANG_45541 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46840 -# define SIMDE_BUG_CLANG_46844 -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VI64_OPS -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) -# define SIMDE_BUG_CLANG_GIT_4EC445B8 -# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ -# endif -# endif -# if defined(SIMDE_ARCH_ARM) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) -# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES -# endif -# if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -# define SIMDE_BUG_CLANG_71763 // https://github.com/llvm/llvm-project/issues/71763 -# endif -# endif -# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_CLANG_46770 -# endif -# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) -# define SIMDE_BUG_CLANG_50893 -# define SIMDE_BUG_CLANG_50901 -# endif -# endif -# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) -# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -# endif -# if defined(SIMDE_ARCH_POWER) -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(14,0,0) -# define SIMDE_BUG_CLANG_50932 -# endif -# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) -# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS -# endif -# endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) -# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_BAD_MADD -# endif -# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) -# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ -# endif -# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) -# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ -# endif -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 -# endif -# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_44589 -# endif -# define SIMDE_BUG_CLANG_48673 // https://github.com/llvm/llvm-project/issues/48017 -# endif -# define SIMDE_BUG_CLANG_45959 // https://github.com/llvm/llvm-project/issues/45304 -# if defined(SIMDE_ARCH_WASM_SIMD128) && !SIMDE_DETECT_CLANG_VERSION_CHECK(17,0,0) -# define SIMDE_BUG_CLANG_60655 -# endif -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif -# elif defined(HEDLEY_INTEL_VERSION) -# define SIMDE_BUG_INTEL_857088 -# elif defined(HEDLEY_MCST_LCC_VERSION) -# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS -# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 -# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT -# elif defined(HEDLEY_PGI_VERSION) -# define SIMDE_BUG_PGI_30104 -# define SIMDE_BUG_PGI_30107 -# define SIMDE_BUG_PGI_30106 -# endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if \ - (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ - HEDLEY_GCC_VERSION_CHECK(4,3,0) -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ - __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -/* Usually the shift count is signed (for example, NEON or SSE). - * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. - * Further info: https://github.com/simd-everywhere/simde/pull/700 - */ -#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) -#else - #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) -#endif - -/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_COMMON_H) */ -/* :: End simde/simde-common.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) - #define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - #include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - #include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; - #endif - - #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; - #endif - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; - #endif - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; - #endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES - typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde__m64_from_private(simde__m64_private v) { - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private -simde__m64_to_private(simde__m64 v) { - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type \ - simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type \ - simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ - simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -# define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -# define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -# define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_and_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - r_.i64[0] = a_.i64[0] & b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -# define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtm64_si64 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtm64_si64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i64[0]; - #endif - #endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -# define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi32_si64 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[2] = { a, 0 }; - r_.neon_i32 = vld1_s32(av); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -# define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtsi64_m64 (int64_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) - return _mm_cvtsi64_m64(a); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); - #else - r_.i64[0] = a; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi64_si32 (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_empty (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); - #else - /* noop */ - #endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_empty() simde_mm_empty() -# define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_or_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - r_.i64[0] = a_.i64[0] | b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -# define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i8 = vld1_s8(v); - #else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m64_private r_; - - #if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), - HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), - HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), - HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), - HEDLEY_STATIC_CAST(int8_t, e0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u8 = vld1_u8(v); - #else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - #endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); - #else - simde__m64_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; - r_.neon_i16 = vld1_s16(v); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16( - HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0) - ); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32( - HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set_pi32 (int32_t e1, int32_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_pi64 (int64_t e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi8 (int8_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi16 (int16_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi16(a, a, a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_set1_pi32 (int32_t a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(a, a); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); - #else - return simde_mm_set_pi16(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setr_pi32 (int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); - #else - return simde_mm_set_pi32(e0, e1); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_setzero_si64 (void) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); - #else - return simde_mm_set_pi32(0, 0); - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_load_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_loadu_si64 (const void* mem_addr) { - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_x_mm_setone_si64 (void) { - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_slli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); - #else - r_.u64[0] = a_.u64[0] << count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -# define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> count; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srli_si64 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; - #else - r_.u64[0] = a_.u64[0] >> count; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; - #else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi16 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_srai_pi32 (simde__m64 a, int count) { - #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); - #else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); - #else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); - #else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); - #else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); - #else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); - #else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -# define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_m_to_int (simde__m64 a) { - #if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -# define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ -/* :: End simde/x86/mmx.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/simde-f16.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2021 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -#if !defined(SIMDE_FLOAT16_H) -#define SIMDE_FLOAT16_H - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* Portable version which should work on pretty much any compiler. - * Obviously you can't rely on compiler support for things like - * conversion to/from 32-bit floats, so make sure you always use the - * functions and macros in this file! - * - * The portable implementations are (heavily) based on CC0 code by - * Fabian Giesen: (see also - * ). - * I have basically just modified it to get rid of some UB (lots of - * aliasing, right shifting a negative value), use fixed-width types, - * and work in C. */ -#define SIMDE_FLOAT16_API_PORTABLE 1 -/* _Float16, per C standard (TS 18661-3; - * ). */ -#define SIMDE_FLOAT16_API_FLOAT16 2 -/* clang >= 6.0 supports __fp16 as an interchange format on all - * targets, but only allows you to use them for arguments and return - * values on targets which have defined an ABI. We get around the - * restriction by wrapping the __fp16 in a struct, but we can't do - * that on Arm since it would break compatibility with the NEON F16 - * functions. */ -#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 -/* This is basically __fp16 as specified by Arm, where arugments and - * return values are raw __fp16 values not structs. */ -#define SIMDE_FLOAT16_API_FP16 4 - -/* Choosing an implementation. This is a bit rough, but I don't have - * any ideas on how to improve it. If you do, patches are definitely - * welcome. */ -#if !defined(SIMDE_FLOAT16_API) - #if defined(__ARM_FP16_FORMAT_IEEE) && (defined(SIMDE_ARM_NEON_FP16) || defined(__ARM_FP16_ARGS)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 - #elif !defined(__EMSCRIPTEN__) && !(defined(__clang__) && defined(SIMDE_ARCH_POWER)) && \ - !(defined(HEDLEY_MSVC_VERSION) && defined(__clang__)) && \ - !(defined(SIMDE_ARCH_MIPS) && defined(__clang__)) && \ - !(defined(__clang__) && defined(SIMDE_ARCH_RISCV64)) && ( \ - defined(SIMDE_X86_AVX512FP16_NATIVE) || \ - (defined(SIMDE_ARCH_X86_SSE2) && HEDLEY_GCC_VERSION_CHECK(12,0,0)) || \ - (defined(SIMDE_ARCH_AARCH64) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !defined(__cplusplus)) || \ - ((defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(15,0,0)) || \ - (!(defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)) && SIMDE_DETECT_CLANG_VERSION_CHECK(6,0,0))) - /* We haven't found a better way to detect this. It seems like defining - * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then - * checking for defined(FLT16_MAX) should work, but both gcc and - * clang will define the constants even if _Float16 is not - * supported. Ideas welcome. */ - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 - #elif defined(__FLT16_MIN__) && \ - (defined(__clang__) && \ - (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) \ - && !defined(SIMDE_ARCH_RISCV64)) - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI - #else - #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE - #endif -#endif - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 - typedef _Float16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #if !defined(__cplusplus) - #define SIMDE_FLOAT16_C(value) value##f16 - #else - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(_Float16, (value)) - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - typedef struct { __fp16 value; } simde_float16; - #if defined(SIMDE_STATEMENT_EXPR_) && !defined(SIMDE_TESTS_H) - #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) - #else - #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) - #define SIMDE_FLOAT16_IS_SCALAR 1 - #endif -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef __fp16 simde_float16; - #define SIMDE_FLOAT16_IS_SCALAR 1 - #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) -#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - typedef struct { uint16_t value; } simde_float16; -#else - #error No 16-bit floating point API. -#endif - -#if \ - defined(SIMDE_VECTOR_OPS) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ - (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) - #define SIMDE_FLOAT16_VECTOR -#endif - -/* Reinterpret -- you *generally* shouldn't need these, they're really - * intended for internal use. However, on x86 half-precision floats - * get stuffed into a __m128i/__m256i, so it may be useful. */ - -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) -SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) - -#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE - #define SIMDE_NANHF simde_uint16_as_float16(0x7E00) // a quiet Not-a-Number - #define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) - #define SIMDE_NINFINITYHF simde_uint16_as_float16(0xFC00) -#else - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF SIMDE_FLOAT16_C(__builtin_nanf16("")) - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_FLOAT16_C(SIMDE_MATH_NAN) - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(__builtin_inf16()) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-__builtin_inf16()) - #else - #define SIMDE_INFINITYHF SIMDE_FLOAT16_C(SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF SIMDE_FLOAT16_C(-SIMDE_MATH_INFINITY) - #endif - #else - #if SIMDE_MATH_BUILTIN_LIBM(nanf16) - #define SIMDE_NANHF __builtin_nanf16("") - #elif defined(SIMDE_MATH_NAN) - #define SIMDE_NANHF SIMDE_MATH_NAN - #endif - #if SIMDE_MATH_BUILTIN_LIBM(inf16) - #define SIMDE_INFINITYHF __builtin_inf16() - #define SIMDE_NINFINITYHF -(__builtin_inf16()) - #else - #define SIMDE_INFINITYHF HEDLEY_STATIC_CAST(simde_float16, SIMDE_MATH_INFINITY) - #define SIMDE_NINFINITYHF HEDLEY_STATIC_CAST(simde_float16, -SIMDE_MATH_INFINITY) - #endif - #endif -#endif - -/* Conversion -- convert between single-precision and half-precision - * floats. */ -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float16 -simde_float16_from_float32 (simde_float32 value) { - simde_float16 res; - - #if \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) - res = HEDLEY_STATIC_CAST(simde_float16, value); - #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) - res.value = HEDLEY_STATIC_CAST(__fp16, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint32_t f32u = simde_float32_as_uint32(value); - static const uint32_t f32u_infty = UINT32_C(255) << 23; - static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; - static const uint32_t denorm_magic = - ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; - uint16_t f16u; - - uint32_t sign = f32u & (UINT32_C(1) << 31); - f32u ^= sign; - - /* NOTE all the integer compares in this function cast the operands - * to signed values to help compilers vectorize to SSE2, which lacks - * unsigned comparison instructions. This is fine since all - * operands are below 0x80000000 (we clear the sign bit). */ - - if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ - f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ - } else { /* (De)normalized number or zero */ - if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ - /* use a magic value to align our 10 mantissa bits at the bottom of - * the float. as long as FP addition is round-to-nearest-even this - * just works. */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); - - /* and one integer subtract of the bias later, we have our final float! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); - } else { - uint32_t mant_odd = (f32u >> 13) & 1; - - /* update exponent, rounding bias part 1 */ - f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); - /* rounding bias part 2 */ - f32u += mant_odd; - /* take the bits! */ - f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); - } - } - - f16u |= sign >> 16; - res = simde_uint16_as_float16(f16u); - #endif - - return res; -} - -static HEDLEY_ALWAYS_INLINE HEDLEY_CONST -simde_float32 -simde_float16_to_float32 (simde_float16 value) { - simde_float32 res; - - #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) - res = HEDLEY_STATIC_CAST(simde_float32, value); - #else - /* This code is CC0, based heavily on code by Fabian Giesen. */ - uint16_t half = simde_float16_as_uint16(value); - const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); - const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ - uint32_t f32u; - - f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ - uint32_t exp = shifted_exp & f32u; /* just the exponent */ - f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ - - /* handle exponent special cases */ - if (exp == shifted_exp) /* Inf/NaN? */ - f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ - else if (exp == 0) { /* Zero/Denormal? */ - f32u += (1) << 23; /* extra exp adjust */ - f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ - } - - f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ - res = simde_uint32_as_float32(f32u); - #endif - - return res; -} - -#ifdef SIMDE_FLOAT16_C - #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) -#else - #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) -#endif - -#if !defined(simde_isinfhf) && defined(simde_math_isinff) - #define simde_isinfhf(a) simde_math_isinff(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnanhf) && defined(simde_math_isnanf) - #define simde_isnanhf(a) simde_math_isnanf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_isnormalhf) && defined(simde_math_isnormalf) - #define simde_isnormalhf(a) simde_math_isnormalf(simde_float16_to_float32(a)) -#endif -#if !defined(simde_issubnormalhf) && defined(simde_math_issubnormalf) - #define simde_issubnormalhf(a) simde_math_issubnormalf(simde_float16_to_float32(a)) -#endif - -#define simde_fpclassifyhf(a) simde_math_fpclassifyf(simde_float16_to_float32(a)) - -static HEDLEY_INLINE -uint8_t -simde_fpclasshf(simde_float16 v, const int imm8) { - uint16_t bits = simde_float16_as_uint16(v); - uint8_t negative = (bits >> 15) & 1; - uint16_t const ExpMask = 0x7C00; // [14:10] - uint16_t const MantMask = 0x03FF; // [9:0] - uint8_t exponent_all_ones = ((bits & ExpMask) == ExpMask); - uint8_t exponent_all_zeros = ((bits & ExpMask) == 0); - uint8_t mantissa_all_zeros = ((bits & MantMask) == 0); - uint8_t zero = exponent_all_zeros & mantissa_all_zeros; - uint8_t signaling_bit = (bits >> 9) & 1; - - uint8_t result = 0; - uint8_t snan = exponent_all_ones & (!mantissa_all_zeros) & (!signaling_bit); - uint8_t qnan = exponent_all_ones & (!mantissa_all_zeros) & signaling_bit; - uint8_t positive_zero = (!negative) & zero; - uint8_t negative_zero = negative & zero; - uint8_t positive_infinity = (!negative) & exponent_all_ones & mantissa_all_zeros; - uint8_t negative_infinity = negative & exponent_all_ones & mantissa_all_zeros; - uint8_t denormal = exponent_all_zeros & (!mantissa_all_zeros); - uint8_t finite_negative = negative & (!exponent_all_ones) & (!zero); - result = (((imm8 >> 0) & qnan) | \ - ((imm8 >> 1) & positive_zero) | \ - ((imm8 >> 2) & negative_zero) | \ - ((imm8 >> 3) & positive_infinity) | \ - ((imm8 >> 4) & negative_infinity) | \ - ((imm8 >> 5) & denormal) | \ - ((imm8 >> 6) & finite_negative) | \ - ((imm8 >> 7) & snan)); - return result; -} - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_FLOAT16_H) */ -/* :: End simde/simde-f16.h :: */ - -#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) - #define NOMINMAX - #include -#endif - -#if defined(__ARM_ACLE) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v16i8 lsx_i8; - v8i16 lsx_i16; - v4i32 lsx_i32; - v2i64 lsx_i64; - v16u8 lsx_u8; - v8u16 lsx_u16; - v4u32 lsx_u32; - v2u64 lsx_u64; - v4f32 lsx_f32; - v2f64 lsx_f64; - #endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) - typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - typedef v4f32 simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde__m128_from_private(simde__m128_private v) { - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private -simde__m128_to_private(simde__m128 v) { - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(float) - simde__m128_to_altivec_f32(simde__m128 value) { - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128 - simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) - #endif - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -#if defined(SIMDE_LOONGARCH_LSX_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) -#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ - -enum { - #if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO - #else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 - #endif -}; -#if defined(_MM_ROUND_MASK) -# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK -#else -# define SIMDE_MM_ROUND_MASK (0x6000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK -#endif - -#if defined(_MM_FROUND_TO_NEAREST_INT) -# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -# define SIMDE_MM_FROUND_TO_POS_INF 0x02 -# define SIMDE_MM_FROUND_TO_ZERO 0x03 -# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -# define SIMDE_MM_FROUND_RAISE_EXC 0x00 -# define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) -# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -#if defined(_MM_EXCEPT_INVALID) -# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -# define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -# define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -# define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -# define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID - #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM - #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO - #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW - #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW - #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT - #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK -#endif - -#if defined(_MM_MASK_INVALID) -# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -# define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -# define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -# define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -# define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -# define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -# define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -# define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -# define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID - #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM - #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO - #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW - #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW - #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT - #define _MM_MASK_MASK SIMDE_MM_MASK_MASK -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK - #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON - #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_ROUNDING_MODE(void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); - #elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { - #if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; - #endif - - #if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; - #endif - - #if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; - #endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; - #else - return SIMDE_MM_ROUND_NEAREST; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); - #elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { - #if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; - #endif - - #if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; - #endif - - #if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; - #endif - - #if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; - #endif - - default: - return; - } - - fesetround(fe_mode); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; - #else - return SIMDE_MM_FLUSH_ZERO_OFF; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_FLUSH_ZERO_MODE(a); - #else - (void) a; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_getcsr (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); - #else - return SIMDE_MM_GET_ROUNDING_MODE(); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_setcsr (uint32_t a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); - #else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK)); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - (void) lax_rounding; - - /* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ - #if \ - defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); - #elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); - #elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); - #elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); - #elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else - #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; - r_.neon_f32 = vld1q_f32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ps1 (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - (void) a; - return vec_splats(a); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - return (simde__m128)__lsx_vldrepl_w(&a, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_splat(a); - #else - return simde_mm_set_ps(a, a, a, a); - #endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ps1(a) simde_mm_set_ps1(a) -# define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_move_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; - r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); - #else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_broadcastlow_ps(simde__m128 a) { - /* This function broadcasts the first element in the inpu vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_ss functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_shuffle_ps(a, a, 0); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_add_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); - #else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_and_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_xor_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_or_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_abs_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - simde_float32 mask_; - uint32_t u32_ = UINT32_C(0x7FFFFFFF); - simde_memcpy(&mask_, &u32_, sizeof(u32_)); - return _mm_and_ps(_mm_set1_ps(mask_), a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] == b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] >= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] > b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] <= b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] < b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #else - return a_.f32[0] != b_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { - simde__m128_private - r_, - dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); - #else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; - r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); - #elif defined(SIMDE_IEEE754_STORAGE) - (void) src_; - (void) dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) - a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); - #else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); - #else - simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; - #else - r_.f32[0] = (simde_float32) b_.i32[0]; - r_.f32[1] = (simde_float32) b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); - #else - simde__m128_private r_; - simde__m64_private - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); - #else - r_.f32[0] = (simde_float32) a_.i32[0]; - r_.f32[1] = (simde_float32) a_.i32[1]; - r_.f32[2] = (simde_float32) b_.i32[0]; - r_.f32[3] = (simde_float32) b_.i32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpi8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi16 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtps_pi8 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu16_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.u16[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpu8_ps (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); - #else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); - #else - return _mm_cvtsi64x_ss(a, b); - #endif - #else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); - #else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm_cvtss_f32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); - #else - return a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtss_si32 (simde__m128 a) { - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtss_si64(a); - #else - return _mm_cvtss_si64x(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); - #else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtt_ps2pi (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); - #else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtt_ss2si (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - simde_float32 v = a_.f32[0]; - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #else - return SIMDE_CONVERT_FTOI(int32_t, v); - #endif - #endif - #endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttss_si64 (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) - #if defined(__PGI) - return _mm_cvttss_si64x(a); - #else - return _mm_cvttss_si64(a); - #endif - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); - #else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) -# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = a_.u32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) - r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_div_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_mm_extract_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m64_private - a_ = simde__m64_to_private(a); - - a_.i16[imm8] = i; - - return simde__m64_from_private(a_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) - #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); - #endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load1_ps (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); - #else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ss (simde_float32 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); - #else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); - #else - simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vld1_f32( - HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); - #else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #if HEDLEY_HAS_WARNING("-Wold-style-cast") - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) - #else - #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); - #else - simde__m128_private - r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); - #else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); - #else - simde__m128_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vld(mem_addr, 0); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m64_private - a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; - #endif -} -#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); - #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) - r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_max_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - #if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); - #else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); - #endif - #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); - r_.f32 = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.f32), - ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | - (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) - ) - ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -# define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_min_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vzip2q_u64(b_.neon_u64, a_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); - #else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); - #else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < nmemb ; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } - #endif - - return r; - #endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_movemask_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movemask_ps(a); - #else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift[4] = {0, 1, 2, 3}; - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(vshlq_u32(tmp, vld1q_s32(shift)))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); - r = __lsx_vpickve2gr_wu(t64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mul_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) - #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) - #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) - #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) - #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) - #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) - #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) - #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) - #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else - #define SIMDE_MM_HINT_NTA 0 - #define SIMDE_MM_HINT_T0 1 - #define SIMDE_MM_HINT_T1 2 - #define SIMDE_MM_HINT_T2 3 - #define SIMDE_MM_HINT_ENTA 4 - #define SIMDE_MM_HINT_ET0 5 - #define SIMDE_MM_HINT_ET1 6 - #define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") - #endif - #undef _MM_HINT_NTA - #define _MM_HINT_NTA SIMDE_MM_HINT_NTA - #undef _MM_HINT_T0 - #define _MM_HINT_T0 SIMDE_MM_HINT_T0 - #undef _MM_HINT_T1 - #define _MM_HINT_T1 SIMDE_MM_HINT_T1 - #undef _MM_HINT_T2 - #define _MM_HINT_T2 SIMDE_MM_HINT_T2 - #undef _MM_HINT_ENTA - #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA - #undef _MM_HINT_ET0 - #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 - #undef _MM_HINT_ET1 - #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_prefetch (const void* p, int i) { - #if \ - HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __builtin_prefetch(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __builtin_prefetch(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __builtin_prefetch(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __builtin_prefetch(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __builtin_prefetch(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __builtin_prefetch(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __builtin_prefetch(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __builtin_prefetch(p, 0, 1); - break; - } - #elif defined(__ARM_ACLE) - #if (__ARM_ACLE >= 101) - switch(i) { - case SIMDE_MM_HINT_NTA: - __pldx(0, 0, 1, p); - break; - case SIMDE_MM_HINT_T0: - __pldx(0, 0, 0, p); - break; - case SIMDE_MM_HINT_T1: - __pldx(0, 1, 0, p); - break; - case SIMDE_MM_HINT_T2: - __pldx(0, 2, 0, p); - break; - case SIMDE_MM_HINT_ENTA: - __pldx(1, 0, 1, p); - break; - case SIMDE_MM_HINT_ET0: - __pldx(1, 0, 0, p); - break; - case SIMDE_MM_HINT_ET1: - __pldx(1, 1, 0, p); - break; - case SIMDE_MM_HINT_ET2: - __pldx(1, 2, 0, p); - break; - } - #else - (void) i; - __pld(p) - #endif - #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) - (void) i; - #pragma mem prefetch p - #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) - switch (i) { - case SIMDE_MM_HINT_NTA: - #pragma _CRI prefetch (nt) p - break; - case SIMDE_MM_HINT_T0: - case SIMDE_MM_HINT_T1: - case SIMDE_MM_HINT_T2: - #pragma _CRI prefetch p - break; - case SIMDE_MM_HINT_ENTA: - #pragma _CRI prefetch (write, nt) p - break; - case SIMDE_MM_HINT_ET0: - case SIMDE_MM_HINT_ET1: - case SIMDE_MM_HINT_ET2: - #pragma _CRI prefetch (write) p - break; - } - #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) - switch(i) { - case SIMDE_MM_HINT_NTA: - __prefetch_by_load(p, 0, 0); - break; - case SIMDE_MM_HINT_T0: - __prefetch_by_load(p, 0, 3); - break; - case SIMDE_MM_HINT_T1: - __prefetch_by_load(p, 0, 2); - break; - case SIMDE_MM_HINT_T2: - __prefetch_by_load(p, 0, 1); - break; - case SIMDE_MM_HINT_ENTA: - __prefetch_by_load(p, 1, 0); - break; - case SIMDE_MM_HINT_ET0: - __prefetch_by_load(p, 1, 3); - break; - case SIMDE_MM_HINT_ET1: - __prefetch_by_load(p, 1, 2); - break; - case SIMDE_MM_HINT_ET2: - __prefetch_by_load(p, 0, 1); - break; - } - #elif HEDLEY_MSVC_VERSION - (void) i; - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ - #define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) - #else - #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) - #endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) - #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_negate_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; - r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - - #if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } - #endif - - r_.neon_f32 = recip; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rcp_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); - #elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); - #else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; - #endif - } - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_rsqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); - #elif defined(SIMDE_IEEE754_STORAGE) - { - #if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); - #else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - - #if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); - #else - ix = INT32_C(0x5F37599E) - (ix >> 1); - #endif - - simde_memcpy(&x, &ix, sizeof(x)); - - #if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - #endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; - #endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); - r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); - #else - uint16_t sum = 0; - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - #endif - - return simde__m64_from_private(r_); - #endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_set_ss (simde_float32 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); - #else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); - #else - return simde_mm_set_ps(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_setzero_ps (void) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); - #else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_undefined_ps (void) { - simde__m128_private r_; - - #if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_setone_ps (void) { - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_sfence (void) { - /* TODO: Use Hedley. */ - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) - #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); - #else - atomic_thread_fence(memory_order_seq_cst); - #endif - #elif defined(_MSC_VER) - MemoryBarrier(); - #elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); - #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); - #elif defined(_OPENMP) - #pragma omp critical(simde_mm_sfence_) - { } - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ - const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 8, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3)) }); })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -# pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); -HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ - wasm_i32x4_shuffle( \ - simde__m128_to_private(a).wasm_v128, \ - simde__m128_to_private(b).wasm_v128, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ - float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ - float32x4_t simde_mm_shuffle_ps_r_; \ - \ - simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ - simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ - vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ - simde__m128_from_private((simde__m128_private) { .f32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sqrt_ss (simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); - #elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #else - simde_memcpy(mem_addr, &a_, sizeof(a)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { - simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr_[i] = a_.f32[0]; - } - #endif - #endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - *mem_addr = a_.f32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private a_ = simde__m128_to_private(a); - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); - #else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE_ALIGNED(mem_addr:16) - for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { - mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); - #else - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - __lsx_vst(a_.lsx_f32, mem_addr, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); - #else - simde_memcpy(mem_addr, &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_sub_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); - #endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] == b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] >= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] > b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] <= b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] < b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f32[0] != b_.f32[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -# if defined(__has_builtin) -# if __has_builtin(__builtin_ia32_undef128) -# define SIMDE_HAVE_UNDEFINED128 -# endif -# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) -# define SIMDE_HAVE_UNDEFINED128 -# endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); - #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) - r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { - #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) || \ - defined(SIMDE_VECTOR_SUBSCRIPT)) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private a_ = simde__m64_to_private(a); - vst1_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), a_.neon_i64); - #else - simde__m64_private* - dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), - a_ = simde__m64_to_private(a); - - dest->i64[0] = a_.i64[0]; - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { - #if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || defined(SIMDE_LOONGARCH_LSX_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_ASSUME_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_ps(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ - vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ - } while (0) -#else - #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ - SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ - row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ - row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ - row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ -/* :: End simde/x86/sse.h :: */ -#if !defined(SIMDE_X86_AVX_H) -#define SIMDE_X86_AVX_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017 Evan Nemerson - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_SSE4_2_H) -#define SIMDE_X86_SSE4_2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse4.1.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -#if !defined(SIMDE_X86_SSE4_1_H) -#define SIMDE_X86_SSE4_1_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/ssse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSSE3_H) -#define SIMDE_X86_SSSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse3.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_SSE3_H) -#define SIMDE_X86_SSE3_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* :: Begin simde/x86/sse2.h :: */ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - #endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; - #endif - SIMDE_ALIGN_TO_16 simde_float16 f16[8]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - #if defined(__ARM_FP16_FORMAT_IEEE) - SIMDE_ALIGN_TO_16 float16x8_t neon_f16; - #endif - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128i_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - - #if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; - #endif - #elif defined(SIMDE_MIPS_MSA_NATIVE) - v16i8 msa_i8; - v8i16 msa_i16; - v4i32 msa_i32; - v2i64 msa_i64; - v16u8 msa_u8; - v8u16 msa_u16; - v4u32 msa_u32; - v2u64 msa_u64; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; - #else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; - #endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; - #endif - #endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) - typedef __m128i simde__m128i; - typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - typedef int64x2_t simde__m128i; -# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - typedef float64x2_t simde__m128d; -# elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -# else - typedef simde__m128d_private simde__m128d; -# endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - typedef v128_t simde__m128i; - typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; - #else - typedef simde__m128d_private simde__m128d; - #endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - typedef simde__m128i_private simde__m128i; - typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - typedef simde__m128i __m128i; - typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde__m128i_from_private(simde__m128i_private v) { - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private -simde__m128i_to_private(simde__m128i v) { - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde__m128d_from_private(simde__m128d_private v) { - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private -simde__m128d_to_private(simde__m128d v) { - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) - #endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) - #if defined(SIMDE_BUG_GCC_95782) - SIMDE_FUNCTION_ATTRIBUTES - SIMDE_POWER_ALTIVEC_VECTOR(double) - simde__m128d_to_altivec_f64(simde__m128d value) { - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; - } - - SIMDE_FUNCTION_ATTRIBUTES - simde__m128d - simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); - } - #else - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) - #endif - #endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); - SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; - r_.neon_f64 = vld1q_f64(data); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); - #else - simde__m128d_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_pd(a) simde_mm_set1_pd(a) - #define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_abs_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - simde_float64 mask_; - uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); - simde_memcpy(&mask_, &u64_, sizeof(u64_)); - return _mm_and_pd(_mm_set1_pd(mask_), a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vabsq_f64(a_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_abs(a_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_not_pd(simde__m128d a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_move_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); - #else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); - #else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_broadcastlow_pd(simde__m128d a) { - /* This function broadcasts the first element in the input vector to - * all lanes. It is used to avoid generating spurious exceptions in - * *_sd functions since there may be garbage in the upper lanes. */ - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_add_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_add_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] + b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_and_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_and_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_xor_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setzero_si128 (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bslli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); - #else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i - imm8]; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ - simde__m128i_from_wasm_v128( \ - wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ - simde__m128i_to_wasm_v128((a)), \ - ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ - ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) - #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_bsrli_si128 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = - #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro - #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo - #endif - (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) - #define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.wasm_v128 = \ - wasm_i8x16_shuffle( \ - simde_tmp_z_.wasm_v128, \ - simde_tmp_a_.wasm_v128, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ - } else { \ - simde_tmp_r_.i8 = \ - SIMDE_SHUFFLE_VECTOR_(8, 16, \ - simde_tmp_z_.i8, \ - (simde_tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde_tmp_r_); })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) - #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_clflush (void const* p) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); - #else - (void) p; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_clflush(p) simde_mm_clflush(p) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] == b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] >= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] > b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] <= b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] < b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #else - return a_.f64[0] != b_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { - simde__m128d_private - r_, - dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); - #else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); - #endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); - #else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); - #endif - #elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castpd_si128 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_castps_si128 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); - #else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_castsi128_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); - #else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_castsi128_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); - #else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_MIPS_MSA_NATIVE) - r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); - #else - return simde_mm_cmple_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); - #else - return simde_mm_cmple_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); - #else - return simde_mm_cmplt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); - #else - return simde_mm_cmplt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); - #else - return simde_mm_cmpge_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); - #else - return simde_mm_cmpge_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); - #else - return simde_mm_cmpgt_pd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); - #else - return simde_mm_cmpgt_sd(a, b); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm_cvtsd_f64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); - #else - return a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); - #else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtepi32_ps (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); - #else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - #if HEDLEY_HAS_WARNING("-Wc11-extensions") - #pragma clang diagnostic ignored "-Wc11-extensions" - #endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (simde_float32) a_.i32[i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvtpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = simde_math_round(a_.f64[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) - return _mm_cvtpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtpd_ps (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) - float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; - r_.f32 = - __builtin_shufflevector( - __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, - 0, 1, 2, 3 - ); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); - r_.f32[2] = SIMDE_FLOAT32_C(0.0); - r_.f32[3] = SIMDE_FLOAT32_C(0.0); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtpi32_pd (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); - #else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (simde_float64) a_.i32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_; - - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) - a_ = simde__m128_to_private(a); - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - a_ = simde__m128_to_private(a); - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - #else - a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); - #else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f32[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsd_si64x(a); - #else - return _mm_cvtsd_si64(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); - #endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) - #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); - #else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i]; - } - #endif - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t -simde_x_mm_cvtsi128_si16 (simde__m128i a) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i16, 0); - #else - return a_.i16[0]; - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvtsi128_si32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - return vec_extract(a_.altivec_i32, 0); - #else - return a_.i32[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvtsi128_si64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if defined(__PGI) - return _mm_cvtsi128_si64x(a); - #else - return _mm_cvtsi128_si64(a); - #endif - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); - #endif - return a_.i64[0]; - #endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) - #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); - #else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_cvtsi16_si128 (int16_t a) { - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); - #else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi32_si128 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); - #else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); - #else - return _mm_cvtsi64x_sd(a, b); - #endif - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); - #else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) - #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsi64_si128 (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvtsi64_si128(a); - #else - return _mm_cvtsi64x_si128(a); - #endif - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); - #else - r_.i64[0] = a; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) - #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); - return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_cvttpd_pi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); - #else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float64 v = a_.f64[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttpd_epi32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); - #else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvttps_epi32 (simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); - #else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - /* Values below INT32_MIN saturate anyways, so we don't need to - * test for that. */ - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = - vandq_u32( - vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), - vceqq_f32(a_.neon_f32, a_.neon_f32) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); - #endif - - r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) - v128_t valid_input = - wasm_v128_and( - wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), - wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) - ); - #elif !defined(SIMDE_FAST_CONVERSION_RANGE) - v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); - #elif !defined(SIMDE_FAST_NANS) - v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); - #endif - - r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); - #endif - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_ARCH_POWER) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); - - #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; - - __typeof__(r_.i32) valid_input = - HEDLEY_REINTERPRET_CAST( - __typeof__(r_.i32), - (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) - ); - #elif !defined(SIMDE_FAST_NANS) - __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); - #endif - - __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; - r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); - #endif - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - simde_float32 v = a_.f32[i]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); - #else - r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_cvttsd_si32 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); - #else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? - SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_cvttsd_si64 (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(__PGI) - return _mm_cvttsd_si64(a); - #else - return _mm_cvttsd_si64x(a); - #endif - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); - #endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) - #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_div_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); - #else - r = a_.u16[imm8 & 7]; - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) - #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load1_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); - #else - return simde_mm_set1_pd(*mem_addr); - #endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) - #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_sd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_load_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); - #else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); - #else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); - #else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); - #else - r_.i64[0] = value; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64(vld1_f64( - HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); - #else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); - #else - simde__m128d_private - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); - #else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); - #else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi8 - #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi16 - #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi32 - #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#endif -#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm_loadu_epi64 - #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si128 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); - #else - simde__m128i_private r_; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); - #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) a32, b32, p32; - SIMDE_CONVERT_VECTOR_(a32, a_.i16); - SIMDE_CONVERT_VECTOR_(b32, b_.i16); - p32 = a32 * b32; - r_.i32 = - __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + - __builtin_shufflevector(p32, p32, 1, 3, 5, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); - #else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ - static const uint8_t md[16] = { - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - 1 << 0, 1 << 1, 1 << 2, 1 << 3, - 1 << 4, 1 << 5, 1 << 6, 1 << 7, - }; - - /* Extend sign bit over entire lane */ - uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); - /* Clear all but the bit we're interested in. */ - uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); - /* Alternate bytes from low half and high half */ - uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r = vaddvq_u16(x); - #else - uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_movemask_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); - #else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); - r = - HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); - HEDLEY_DIAGNOSTIC_POP - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); - r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_movepi64_pi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); - #else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); - #else - r_.i64[0] = a_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movpi64_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); - #else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_min_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_max_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_move_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( - wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), - wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { 0, }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) - r_.i64 = a_.i64 % b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mul_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); - #else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_mul_su32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); - #else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); - #else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - #else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; - #endif - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); - const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); - r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_or_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_or_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; - const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; - - int16_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = v > max; - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; - const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; - - int32_t m SIMDE_VECTOR(32); - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); - v = (v & ~m) | (min & m); - - m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); - v = (v & ~m) | (max & m); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); - #else - r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8( - vqmovun_s16(a_.neon_i16), - vqmovun_s16(b_.neon_i16) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - - v &= ~(v >> 15); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i8, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; - r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_pause (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); - #elif defined(SIMDE_ARCH_X86) - __asm__ __volatile__("pause"); - #elif defined(SIMDE_ARCH_ARM_NEON) - #if defined(_MSC_VER) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); - #endif - #elif defined(SIMDE_ARCH_POWER) - __asm__ __volatile__ ("or 27,27,27" ::: "memory"); - #elif defined(SIMDE_ARCH_WASM) - __asm__ __volatile__ ("nop"); - #elif defined(HEDLEY_GCC_VERSION) - #if defined(SIMDE_ARCH_RISCV) - __builtin_riscv_pause(); - #else - __asm__ __volatile__ ("nop" ::: "memory"); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64( - vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_i16 = vld1q_s16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si16 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si16(mem_addr); - #else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_i32 = vld1q_s32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); - #else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si32 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ - HEDLEY_GCC_VERSION_CHECK(12,1,0)) - return _mm_loadu_si32(mem_addr); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m128i_private r_; - r_.neon_i32 = vsetq_lane_s32(* HEDLEY_REINTERPRET_CAST(const int32_t *, mem_addr), vdupq_n_s32(0), 0); - return simde__m128i_from_private(r_); - #else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); - #else - r_.m64[0] = e0; - r_.m64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set_epi64x (int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(e1, e0); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_loadu_si64 (void const* mem_addr) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - return _mm_loadu_si64(mem_addr); - #else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { - e0, e1, e2, e3, - e4, e5, e6, e7, - e8, e9, e10, e11, - e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); - #else - r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; - r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; - r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; - r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; - r_.neon_u16 = vld1q_u16(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); - #else - r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; - r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32( - HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; - r_.neon_u32 = vld1q_u32(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); - #else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_make(e0, e1); - #else - r_.u64[0] = e0; - r_.u64[1] = e1; - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_set_sd (simde_float64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); - #else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) - return _mm_set1_epi64x(a); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_set1_epi64 (simde__m64 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); - #else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu8 (uint8_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); - #else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu16 (uint16_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); - #else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu32 (uint32_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); - #else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_set1_epu64 (uint64_t value) { - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); - #else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); - #else - return simde_mm_set_epi32(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); - #else - return simde_mm_set_epi64(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); - #else - return simde_mm_set_pd(e0, e1); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_setzero_pd (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); - #else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_undefined_pd (void) { - simde__m128d_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_undefined_si128 (void) { - simde__m128i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); - #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_setone_pd (void) { - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_setone_si128 (void) { - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_wasm_v128( \ - wasm_i32x4_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)); })) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__ ({ \ - const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ - int32x4_t simde_mm_shuffle_epi32_r_; \ - simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ - simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ - vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 16, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d_from_private((simde__m128d_private) { .f64 = \ - SIMDE_SHUFFLE_VECTOR_(64, 16, \ - simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8) ) & 1), \ - (((imm8) >> 1) & 1) + 2) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__ ({ \ - int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ - simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ - simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ - wasm_i16x8_shuffle( \ - (simde_tmp_a_).wasm_v128, \ - (simde_tmp_a_).wasm_v128, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_shufflelo_epi16(a, imm8) \ - simde__m128i_from_wasm_v128( \ - wasm_i16x8_shuffle( \ - simde__m128i_to_wasm_v128((a)), \ - wasm_i16x8_splat(0), \ - (((imm8) & 0x03) ), \ - (((imm8) & 0x0c) >> 2), \ - (((imm8) & 0x30) >> 4), \ - (((imm8) & 0xc0) >> 6), \ - 4, 5, 6, 7)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ - int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ - simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ - simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ - })) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 16, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7) }); })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << s; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = vec_sqrt(a_.altivec_f64); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - #if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srai_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* MSVC requires a range of (0, 255). */ - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i16( \ - ((imm8) > 15) ? \ - vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ - vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i32( \ - ((imm8) > 31) ? \ - vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ - vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_slli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_slli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_i64( \ - ((imm8) > 63) ? \ - vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ - vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi16 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u16( \ - ((imm8) > 15) ? \ - vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ - vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u32( \ - ((imm8) > 31) ? \ - vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ - vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #define simde_mm_srli_epi32(a, imm8) \ - (__extension__ ({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srli_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); - #else - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) - #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - (((imm8) <= 0) ? \ - (a) : \ - simde__m128i_from_neon_u64( \ - ((imm8) > 63) ? \ - vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ - vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); - #else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) - #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); - #else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void - simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); - #else - *mem_addr = a_.f64[1]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - #endif - tmp = vec_extract(a_.altivec_i64, 0); - #else - tmp = a_.i64[0]; - #endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); - #else - tmp = a_.f64[0]; - #endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); - #else - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); - #else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; - #endif - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si16(mem_addr, a); - #else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si32(mem_addr, a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); - #else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ - HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(20,21,1)) - _mm_storeu_si64(mem_addr, a); - #else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_pd(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && ( \ - defined(SIMDE_VECTOR_SUBSCRIPT) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_mm_store_si128(mem_addr, a); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_s32(mem_addr, vdupq_n_s32(a), 0); - #else - *mem_addr = a; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) - __builtin_nontemporal_store(a, mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_s64(mem_addr, vdup_n_s64(a)); - #else - *mem_addr = a; - #endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) - #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_sub_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); - #else - r_.i64[0] = a_.i64[0] - b_.i64[0]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] == b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] >= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] > b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] <= b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] < b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); - #elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); - #else - r = a_.f64[0] != b_.f64[0]; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_lfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_mfence (void) { - #if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); - #else - simde_mm_sfence(); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.neon_i64); - int64x1_t b_l = vget_low_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_negate_pd(simde__m128d a) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_not_si128 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) - #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/sse2.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + halfway_point] = b_.i16[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + halfway_point] = b_.i32[2 * i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[0]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + halfway_point] = b_.f32[2 * i]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); - r_.neon_f32 = t.val[1]; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; - } - #endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + halfway_point] = b_.f64[2 * i]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - for(size_t i = 0 ; i < halfway_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; - } - #endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); - float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); - return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); - #else - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_addsub_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); - float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); - return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); - #else - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_addsub_ps(a, b) simde_mm_addsub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_pd(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); - #else - return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hadd_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); - #else - return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_pd(a, b); - #else - return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_hsub_ps(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); - return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); - #else - return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_lddqu_si128(mem_addr); - #else - simde__m128i_private r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); - #else - simde_memcpy(&r_, mem_addr, sizeof(r_)); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loaddup_pd (simde_float64 const* mem_addr) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_loaddup_pd(mem_addr); - #else - simde__m128d_private r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(*mem_addr); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); - #else - r_.f64[0] = *mem_addr; - r_.f64[1] = *mem_addr; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_movedup_pd (simde__m128d a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movedup_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = a_.f64[0]; - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_movehdup_ps (simde__m128 a) { - #if defined(SIMDE_X86_SSE3_NATIVE) - return _mm_movehdup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); - #else - r_.f32[0] = a_.f32[1]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = a_.f32[3]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_moveldup_ps (simde__m128 a) { - #if defined(SIMDE__SSE3_NATIVE) - return _mm_moveldup_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[0]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[2]; - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) -# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE3_H) */ -/* :: End simde/x86/sse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi8(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabsq_s8(a_.neon_i8); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_abs(a_.altivec_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabsq_s16(a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_abs(a_.altivec_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_abs_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_abs_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); - return _mm_sub_epi32(_mm_xor_si128(a, m), m); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabsq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_abs(a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_PUSH - #pragma warning(disable:4146) - #endif - r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); - #if defined(_MSC_VER) - HEDLEY_DIAGNOSTIC_POP - #endif - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi8 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi8(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vabs_s8(a_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi16 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi16(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vabs_s16(a_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_abs_pi32 (simde__m64 a) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_abs_pi32(a); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vabs_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm_setzero_si128(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.i8[i] = 0; - } else if (srcpos > 15) { - r_.i8[i] = a_.i8[(srcpos) & 15]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_epi8(a, b, count) \ - ( \ - ((count) > 31) \ - ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ - : ( \ - ((count) > 15) \ - ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ - : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) - #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) - SIMDE_REQUIRE_CONSTANT(count) { - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 15) { - r_.i8[i] = 0; - } else if (srcpos > 7) { - r_.i8[i] = a_.i8[(srcpos) & 7]; - } else { - r_.i8[i] = b_.i8[srcpos]; - } - } - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) -# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_alignr_pi8(a, b, count) \ - ( \ - ((count) > 15) \ - ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ - : ( \ - ((count) > 7) \ - ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ - : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) -#endif -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_shuffle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Mask out the bits we're not interested in. vtbl will result in 0 - * for any values outside of [0, 15], so if the high bit is set it - * will return 0, just like in SSSE3. */ - b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); - - /* Convert a from an int8x16_t to an int8x8x2_t */ - int8x8x2_t i; - i.val[0] = vget_low_s8(a_.neon_i8); - i.val[1] = vget_high_s8(a_.neon_i8); - - /* Table lookups */ - int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); - int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); - - r_.neon_i8 = vcombine_s8(l, h); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - /* This is a bit ugly because of the casts and the awful type - * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just - * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ - SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; - SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); - SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); - r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_swizzle( - a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_shuffle_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); - r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); - #else - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadd_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); - #else - return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] + a_.i16[1]; - r_.i16[1] = a_.i16[2] + a_.i16[3]; - r_.i16[2] = b_.i16[0] + b_.i16[1]; - r_.i16[3] = b_.i16[2] + b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadd_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] + a_.i32[1]; - r_.i32[1] = b_.i32[0] + b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hadds_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); - #else - return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hadds_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; - int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); - r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsub_epi32(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); - return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); - #else - return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); - #else - r_.i16[0] = a_.i16[0] - a_.i16[1]; - r_.i16[1] = a_.i16[2] - a_.i16[3]; - r_.i16[2] = b_.i16[0] - b_.i16[1]; - r_.i16[3] = b_.i16[2] - b_.i16[3]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsub_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); - #else - r_.i32[0] = a_.i32[0] - a_.i32[1]; - r_.i32[1] = b_.i32[0] - b_.i32[1]; - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_hsubs_epi16(a, b); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); - return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); - #else - return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_hsubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); - #else - for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { - r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); - r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Zero extend a */ - int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); - int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); - - /* Sign extend by shifting left then shifting right. */ - int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); - int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); - - /* multiply */ - int16x8_t prod1 = vmulq_s16(a_even, b_even); - int16x8_t prod2 = vmulq_s16(a_odd, b_odd); - - /* saturated add */ - r_.neon_i16 = vqaddq_s16(prod1, prod2); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_maddubs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); - int16x8_t bi = vmovl_s8(b_.neon_i8); - int16x8_t p = vmulq_s16(ai, bi); - int16x4_t l = vget_low_s16(p); - int16x4_t h = vget_high_s16(p); - r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_mulhrs_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), - vget_low_s16(b_.neon_i16)); - int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); - int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); - - /* Join together */ - r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); - v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); - const v128_t __inc = wasm_i32x4_splat(0x4000); - __lo = wasm_i32x4_add(__lo, __inc); - __hi = wasm_i32x4_add(__hi, __inc); - __lo = wasm_i32x4_add(__lo, __lo); - __hi = wasm_i32x4_add(__hi, __hi); - r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhrs_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Multiply */ - int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); - - /* Rounding narrowing shift right - * narrow = (int16_t)((mul + 16384) >> 15); */ - int16x4_t narrow = vrshrn_n_s32(mul, 15); - - /* Join together */ - r_.neon_i16 = narrow; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); - uint8x16_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s8(b_.neon_i8); - #else - bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); - #endif - bnz_mask = vmvnq_u8(bnz_mask); - - r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); - simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); - uint16x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s16(b_.neon_i16); - #else - bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); - #endif - bnz_mask = vmvnq_u16(bnz_mask); - - r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); - simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_sign_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); - uint32x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqzq_s32(b_.neon_i32); - #else - bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); - #endif - bnz_mask = vmvnq_u32(bnz_mask); - - r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); - simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); - r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi8(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); - uint8x8_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s8(b_.neon_i8); - #else - bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); - #endif - bnz_mask = vmvn_u8(bnz_mask); - - r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi16(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); - uint16x4_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s16(b_.neon_i16); - #else - bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); - #endif - bnz_mask = vmvn_u16(bnz_mask); - - r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 -simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { - #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sign_pi32(a, b); - #else - simde__m64_private - r_, - a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); - uint32x2_t bnz_mask; - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - bnz_mask = vceqz_s32(b_.neon_i32); - #else - bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); - #endif - bnz_mask = vmvn_u32(bnz_mask); - - r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); - #else - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); - } - #endif - - return simde__m64_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) -# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ -/* :: End simde/x86/ssse3.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_epi16(a, b, imm8) \ - (__extension__ ({ \ - simde__m128i_private \ - simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ - simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ - simde_mm_blend_epi16_r_; \ - \ - simde_mm_blend_epi16_r_.i16 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, \ - simde_mm_blend_epi16_a_.i16, \ - simde_mm_blend_epi16_b_.i16, \ - ((imm8) & (1 << 0)) ? 8 : 0, \ - ((imm8) & (1 << 1)) ? 9 : 1, \ - ((imm8) & (1 << 2)) ? 10 : 2, \ - ((imm8) & (1 << 3)) ? 11 : 3, \ - ((imm8) & (1 << 4)) ? 12 : 4, \ - ((imm8) & (1 << 5)) ? 13 : 5, \ - ((imm8) & (1 << 6)) ? 14 : 6, \ - ((imm8) & (1 << 7)) ? 15 : 7 \ - ); \ - \ - simde__m128i_from_private(simde_mm_blend_epi16_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi16 - #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_pd(a, b, imm8) \ - (__extension__ ({ \ - simde__m128d_private \ - simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ - simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ - simde_mm_blend_pd_r_; \ - \ - simde_mm_blend_pd_r_.f64 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, \ - simde_mm_blend_pd_a_.f64, \ - simde_mm_blend_pd_b_.f64, \ - ((imm8) & (1 << 0)) ? 2 : 0, \ - ((imm8) & (1 << 1)) ? 3 : 1 \ - ); \ - \ - simde__m128d_from_private(simde_mm_blend_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_pd - #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm_blend_ps(a, b, imm8) \ - (__extension__ ({ \ - simde__m128_private \ - simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ - simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ - simde_mm_blend_ps_r_; \ - \ - simde_mm_blend_ps_r_.f32 = \ - SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, \ - simde_mm_blend_ps_a_.f32, \ - simde_mm_blend_ps_b_.f32, \ - ((imm8) & (1 << 0)) ? 4 : 0, \ - ((imm8) & (1 << 1)) ? 5 : 1, \ - ((imm8) & (1 << 2)) ? 6 : 2, \ - ((imm8) & (1 << 3)) ? 7 : 3 \ - ); \ - \ - simde__m128_from_private(simde_mm_blend_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_ps - #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_epi8(a, b, mask); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); - return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Use a signed shift right to create a mask with the sign bit */ - mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); - r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); - #else - mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; - #endif - - r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t m = mask_.i8[i] >> 7; - r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_epi8 - #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE2_NATIVE) - mask = simde_mm_srai_epi16(mask, 15); - return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); - r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; - mask_.i16 = mask_.i16 < z; - #else - mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; - #endif - - r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int16_t m = mask_.i16[i] >> 15; - r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; - mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); - #else - mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; - #endif - - r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t m = mask_.i32[i] >> 31; - r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); - r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); - #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); - r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - #if defined(HEDLEY_INTEL_VERSION_CHECK) - __typeof__(mask_.i64) z = { 0, 0 }; - mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); - #else - mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; - #endif - - r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t m = mask_.i64[i] >> 63; - r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_pd - #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); - return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); - #else - return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_blendv_ps - #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_pd (simde__m128d a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - /* For architectures which lack a current direction SIMD instruction. */ - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; - #endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndiq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); - #elif defined(simde_math_roundeven) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_roundeven(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndmq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - #endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndpq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); - #elif defined(simde_math_ceil) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - #endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vrndq_f64(a_.neon_f64); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - #endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_pd - #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_pd - #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ps - #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_sd - #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_ceil_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_ceilf) - r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_ceil_ss - #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cmpeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ - uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); - uint32x4_t swapped = vrev64q_u32(cmp); - r_.neon_u32 = vandq_u32(cmp, swapped); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpeq_epi64 - #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_i16 = s16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, 0, -1, 1, -1, 2, -1, 3, - -1, 4, -1, 5, -1, 6, -1, 7)); - r_.i16 >>= 8; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi16 - #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_unpacklo_epi8(a, a); - tmp = _mm_unpacklo_epi16(tmp, tmp); - return _mm_srai_epi32(tmp, 24); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ - r_.neon_i32 = s32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, 0, -1, -1, -1, 1, - -1, -1, -1, 2, -1, -1, -1, 3)); - r_.i32 >>= 24; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi32 - #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi8_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ - int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); - v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); - r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - /* Disabled on x86 due to lack of 64-bit arithmetic shift until - * until AVX-512 (at which point we would be using the native - * _mm_cvtepi_epi64 anyways). */ - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, - -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, -1, -1, 1)); - r_.i64 >>= 56; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi8_epi64 - #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - r_.neon_u16 = u16x8; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 16, 1, 17, 2, 18, 3, 19, - 4, 20, 5, 21, 6, 22, 7, 23)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi16 - #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi32(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - r_.neon_u32 = u32x4; - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 1, 21, 22, 23, - 2, 25, 26, 27, 3, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi32 - #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu8_epi64(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - __m128i s = _mm_set_epi8( - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), - HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); - return _mm_shuffle_epi8(a, s); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.i8) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, - 0, 17, 18, 19, 20, 21, 22, 23, - 1, 25, 26, 27, 28, 29, 30, 31)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu8_epi64 - #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); - r_.i32 >>= 16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi32 - #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 1, 11, 2, 13, 3, 15)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi32 - #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu16_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i z = _mm_setzero_si128(); - return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ - uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ - r_.neon_u64 = u64x2; - #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u16) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, - 0, 9, 10, 11, - 1, 13, 14, 15)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu16_epi64 - #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi16_epi64(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ - int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ - int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ - r_.neon_i64 = s64x2; - #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, - 8, 9, 10, 0, - 12, 13, 14, 1)); - r_.i64 >>= 48; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi16_epi64 - #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepi32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i tmp = _mm_shuffle_epi32(a, 0x50); - tmp = _mm_srai_epi32(tmp, 31); - tmp = _mm_shuffle_epi32(tmp, 0xed); - return _mm_unpacklo_epi32(a, tmp); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); - #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); - r_.i64 >>= 32; - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepi32_epi64 - #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_cvtepu32_epi64(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, _mm_setzero_si128()); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); - #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(r_.u32) z = { 0, }; - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); - #elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtepu32_epi64 - #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); - - switch (imm8) { - case 0xff: - r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); - break; - case 0x13: - r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); - break; - default: - { /* imm8 is a compile-time constant, so this all becomes just a load */ - uint64_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - - r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); - - { - uint64_t mask_data[] = { - (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), - (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) - }; - r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); - } - break; - } - #else - simde_float64 sum = SIMDE_FLOAT64_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; - } - #endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_pd - #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); - - switch (imm8) { - case 0xff: - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - case 0x7f: - r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - break; - default: - { - { - uint32_t mask_data[] = { - (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - - r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); - - { - uint32_t mask_data[] = { - (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), - (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) - }; - r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); - } - } - break; - } - #else - simde_float32 sum = SIMDE_FLOAT32_C(0.0); - - SIMDE_VECTORIZE_REDUCTION(+:sum) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); - } - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); - } - #endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_dp_ps((a), (b), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_dp_ps - #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) -#endif - -#if defined(simde_mm_extract_epi8) -# undef simde_mm_extract_epi8 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int8_t -simde_mm_extract_epi8 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i8, imm8); - #else - return a_.i8[imm8 & 15]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) -# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi8 - #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) -#endif - -#if defined(simde_mm_extract_epi32) -# undef simde_mm_extract_epi32 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_epi32 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i32, imm8); - #else - return a_.i32[imm8 & 3]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_epi32 - #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) -#endif - -#if defined(simde_mm_extract_epi64) -# undef simde_mm_extract_epi64 -#endif -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm_extract_epi64 (simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m128i_private - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - #if defined(SIMDE_BUG_GCC_95227) - (void) a_; - (void) imm8; - #endif - return vec_extract(a_.altivec_i64, imm8); - #else - return a_.i64[imm8 & 1]; - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_extract_epi64 - #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) -#endif - -#if defined(simde_mm_extract_ps) -# undef simde_mm_extract_ps -#endif -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm_extract_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128_private - a_ = simde__m128_to_private(a); - - return a_.i32[imm8 & 3]; -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_extract_ps - #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_pd (simde__m128d a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); - #endif - return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_pd - #define _mm_floor_pd(a) simde_mm_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ps (simde__m128 a) { - #if defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); - #endif - return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ps - #define _mm_floor_ps(a) simde_mm_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_floor_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_sd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_floor) - r_.f64[0] = simde_math_floor(b_.f64[0]); - r_.f64[1] = a_.f64[1]; - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_sd - #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_floor_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_floor_ss(a, b); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) - return simde_mm_move_ss(a, simde_mm_floor_ps(b)); - #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_floorf) - r_.f32[0] = simde_math_floorf(b_.f32[0]); - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i]; - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_floor_ss - #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - /* clang-3.8 returns an incompatible type, so we need the cast. MSVC - * can't handle the cast ("error C2440: 'type cast': cannot convert - * from '__m128i' to '__m128i'"). */ - #if defined(__clang__) - #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) - #else - #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi8 - #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #if defined(__clang__) - #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) - #else - #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) - #endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_epi32 - #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - #if defined(SIMDE_BUG_GCC_94482) - simde__m128i_private - a_ = simde__m128i_to_private(a); - - switch(imm8) { - case 0: - return simde_mm_set_epi64x(a_.i64[1], i); - break; - case 1: - return simde_mm_set_epi64x(i, a_.i64[0]); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else - simde__m128i_private - r_ = simde__m128i_to_private(a); - - r_.i64[imm8] = i; - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) -# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm_insert_epi64 - #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - float tmp1_ = b_.f32[(imm8 >> 6) & 3]; - a_.f32[(imm8 >> 4) & 3] = tmp1_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_insert_ps - #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi8(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi8(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi8 - #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_max_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - __m128i m = _mm_cmpgt_epi32(a, b); - return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epi32 - #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_add_epi16(b, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu16 - #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_max_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_max_epu32 - #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi8 - #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) - return _mm_min_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epi32 - #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu16(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ - return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu16 - #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_min_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) - r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_min_epu32 - #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_minpos_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_minpos_epu16(a); - #else - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a); - - r_.u16[0] = UINT16_MAX; - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (a_.u16[i] < r_.u16[0]) { - r_.u16[0] = a_.u16[i]; - r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); - } - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_minpos_epu16 - #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - const int a_offset = imm8 & 4; - const int b_offset = (imm8 & 3) << 2; - -#if defined(simde_math_abs) - for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) -# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mpsadbw_epu8 - #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mul_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // vmull_s32 upcasts instead of masking, so we downcast. - int32x2_t a_lo = vmovn_s64(a_.neon_i64); - int32x2_t b_lo = vmovn_s64(b_.neon_i64); - r_.neon_i64 = vmull_s32(a_lo, b_lo); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make( - wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), - wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mul_epi32 - #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_mullo_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void) a_; - (void) b_; - r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_mullo_epi32 - #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_packus_epi32(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const __m128i max = _mm_set1_epi32(UINT16_MAX); - const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); - const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); - return - _mm_packs_epi32( - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), - _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) - ); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #if defined(SIMDE_BUG_CLANG_46840) - r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); - #else - r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); - #endif - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = - vcombine_u16( - vqmovun_s32(a_.neon_i32), - vqmovun_s32(b_.neon_i32) - ); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); - - v &= ~(v >> 31); - v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); - - SIMDE_CONVERT_VECTOR_(r_.i16, v); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; - r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_packus_epi32 - #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128d_private - r_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f64[0] = simde_math_nearbyint(b_.f64[0]); - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f64[0] = simde_math_floor(b_.f64[0]); - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f64[0] = simde_math_ceil(b_.f64[0]); - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f64[0] = simde_math_trunc(b_.f64[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) -# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) -#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_sd - #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { - simde__m128_private - r_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - case SIMDE_MM_FROUND_CUR_DIRECTION: - r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - r_.f32[0] = simde_math_floorf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - r_.f32[0] = simde_math_ceilf(b_.f32[0]); - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - r_.f32[0] = simde_math_truncf(b_.f32[0]); - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) -#elif SIMDE_NATURAL_VECTOR_SIZE > 0 - #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_round_ss - #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_load) && ( \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) || defined(SIMDE_VECTOR_SUBSCRIPT) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ - defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) - return __builtin_nontemporal_load(mem_addr); - #else - return simde_mm_load_si128(mem_addr); - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_stream_load_si128 - #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_ones (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_ones(a); - #else - simde__m128i_private a_ = simde__m128i_to_private(a); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; - #else - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_ones - #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_all_zeros(a, mask); - #else - simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); - int r; - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; - #else - int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(|:r_) - for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { - r_ |= tmp_.i32f[i]; - } - - r = !r_; - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_all_zeros - #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_test_mix_ones_zeros(a, mask); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); - int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); - return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); - long long c0 = wasm_i64x2_extract_lane(m, 0); - long long c1 = wasm_i64x2_extract_lane(m, 1); - long long ones = c0 | c1; - long long zeros = ~(c0 & c1); - return ones && zeros; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) - if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) - return 1; - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_test_mix_ones_zeros - #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #else - int_fast32_t r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_si128 - #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testnzc_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); - int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); - return !( !(vgetq_lane_s64(s641, 0) || vgetq_lane_s64(s641, 1)) \ - || !(vgetq_lane_s64(s640, 0) || vgetq_lane_s64(s640, 1)) ); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); - return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ - && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) - return 1; - } - - return 0; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_si128 - #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_testz_si128(a, b); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); - return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); - return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; - #elif defined(SIMDE_HAVE_INT128_) - if ((a_.u128[0] & b_.u128[0]) == 0) { - return 1; - } - return 0; - #else - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - if ((a_.u64[i] & b_.u64[i]) > 0) - return 0; - } - #endif - - return 1; - #endif -} -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_si128 - #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_1_H) */ -/* :: End simde/x86/sse4.1.h :: */ - -#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) - #include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS - #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS - #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS - #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS - #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY - #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES - #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH - #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED - #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY - #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY - #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT - #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT - #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK - #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#else - #define SIMDE_SIDD_UBYTE_OPS 0x00 - #define SIMDE_SIDD_UWORD_OPS 0x01 - #define SIMDE_SIDD_SBYTE_OPS 0x02 - #define SIMDE_SIDD_SWORD_OPS 0x03 - #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 - #define SIMDE_SIDD_CMP_RANGES 0x04 - #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 - #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c - #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 - #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 - #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 - #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 - #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 - #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 - #define SIMDE_SIDD_BIT_MASK 0x00 - #define SIMDE_SIDD_UNIT_MASK 0x40 -#endif - -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) - #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS - #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS - #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS - #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS - #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY - #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES - #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH - #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED - #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY - #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY - #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY - #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY - #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT - #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT - #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK - #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ - _mm_cmpestrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrs - #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - #if !defined(HEDLEY_PGI_VERSION) - /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ - (void) a; - (void) b; - #endif - (void) la; - (void) lb; - return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); -} -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ - _mm_cmpestrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ - imm8) - #else - #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) - #endif -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpestrz - #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpgt_epi64(a, b); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/65175746/501126 */ - __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); - r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); - return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* https://stackoverflow.com/a/65223269/501126 */ - r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpgt_epi64 - #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_8_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 8) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i8[i]) - a_invalid = 1; - } - return a_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrs_16_(simde__m128i a) { - simde__m128i_private a_= simde__m128i_to_private(a); - const int upper_bound = (128 / 16) - 1; - int a_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!a_.i16[i]) - a_invalid = 1; - } - return a_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrs(a, b, imm8) \ - _mm_cmpistrs( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrs(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrs_16_((a)) \ - : simde_mm_cmpistrs_8_((a))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrs - #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_8_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 8) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i8[i]) - b_invalid = 1; - } - return b_invalid; -} - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_cmpistrz_16_(simde__m128i b) { - simde__m128i_private b_= simde__m128i_to_private(b); - const int upper_bound = (128 / 16) - 1; - int b_invalid = 0; - SIMDE_VECTORIZE - for (int i = 0 ; i <= upper_bound ; i++) { - if(!b_.i16[i]) - b_invalid = 1; - } - return b_invalid; -} - -#if defined(SIMDE_X86_SSE4_2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_cmpistrz(a, b, imm8) \ - _mm_cmpistrz( \ - HEDLEY_REINTERPRET_CAST(__v16qi, a), \ - HEDLEY_REINTERPRET_CAST(__v16qi, b), \ - imm8) - #else - #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) - #endif -#else - #define simde_mm_cmpistrz(a, b, imm8) \ - (((imm8) & SIMDE_SIDD_UWORD_OPS) \ - ? simde_mm_cmpistrz_16_((b)) \ - : simde_mm_cmpistrz_8_((b))) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #undef _mm_cmpistrz - #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u8(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cb(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc ^= v; - for(int bit = 0 ; bit < 8 ; bit++) { - if (crc & 1) - crc = (crc >> 1) ^ UINT32_C(0x82f63b78); - else - crc = (crc >> 1); - } - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u16(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32ch(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u8(crc, v & 0xff); - crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t -simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_crc32_u32(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cw(prevcrc, v); - #else - uint32_t crc = prevcrc; - crc = simde_mm_crc32_u16(crc, v & 0xffff); - crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint64_t -simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { - #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) - return _mm_crc32_u64(prevcrc, v); - #else - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); - #else - uint64_t crc = prevcrc; - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); - crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); - return crc; - #endif - #endif -} -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE4_2_H) */ -/* :: End simde/x86/sse4.2.h :: */ -/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ -/* 589c7d599ae2213823acc4334a3ae8ef8caefe18 */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; - SIMDE_ALIGN_TO_32 simde__m128 m128[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256 n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; - SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256d n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256d_private; - -typedef union { - #if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #endif - #if defined(SIMDE_FLOAT16_VECTOR) - SIMDE_ALIGN_TO_32 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - #endif - SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - #else - SIMDE_ALIGN_TO_32 int8_t i8[32]; - SIMDE_ALIGN_TO_32 int16_t i16[16]; - SIMDE_ALIGN_TO_32 int32_t i32[8]; - SIMDE_ALIGN_TO_32 int64_t i64[4]; - SIMDE_ALIGN_TO_32 uint8_t u8[32]; - SIMDE_ALIGN_TO_32 uint16_t u16[16]; - SIMDE_ALIGN_TO_32 uint32_t u32[8]; - SIMDE_ALIGN_TO_32 uint64_t u64[4]; - SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_32 simde_int128 i128[2]; - SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; - #endif - SIMDE_ALIGN_TO_32 simde_float16 f16[16]; - SIMDE_ALIGN_TO_32 simde_float32 f32[8]; - SIMDE_ALIGN_TO_32 simde_float64 f64[4]; - #endif - - SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; - SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; - - #if defined(SIMDE_X86_AVX_NATIVE) - SIMDE_ALIGN_TO_32 __m256i n; - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; - #endif - #endif -} simde__m256i_private; - -#if defined(SIMDE_X86_AVX_NATIVE) - typedef __m256 simde__m256; - typedef __m256i simde__m256i; - typedef __m256d simde__m256d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; -#else - typedef simde__m256_private simde__m256; - typedef simde__m256i_private simde__m256i; - typedef simde__m256d_private simde__m256d; -#endif - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) - typedef simde__m256 __m256; - typedef simde__m256i __m256i; - typedef simde__m256d __m256d; - #else - #undef __m256 - #define __m256 simde__m256 - #undef __m256i - #define __m256i simde__m256i - #undef __m256d - #define __m256d simde__m256d - #endif -#endif - -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); -HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde__m256_from_private(simde__m256_private v) { - simde__m256 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256_private -simde__m256_to_private(simde__m256 v) { - simde__m256_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde__m256i_from_private(simde__m256i_private v) { - simde__m256i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i_private -simde__m256i_to_private(simde__m256i v) { - simde__m256i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde__m256d_from_private(simde__m256d_private v) { - simde__m256d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d_private -simde__m256d_to_private(simde__m256d v) { - simde__m256d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_CMP_EQ_OQ 0 -#define SIMDE_CMP_LT_OS 1 -#define SIMDE_CMP_LE_OS 2 -#define SIMDE_CMP_UNORD_Q 3 -#define SIMDE_CMP_NEQ_UQ 4 -#define SIMDE_CMP_NLT_US 5 -#define SIMDE_CMP_NLE_US 6 -#define SIMDE_CMP_ORD_Q 7 -#define SIMDE_CMP_EQ_UQ 8 -#define SIMDE_CMP_NGE_US 9 -#define SIMDE_CMP_NGT_US 10 -#define SIMDE_CMP_FALSE_OQ 11 -#define SIMDE_CMP_NEQ_OQ 12 -#define SIMDE_CMP_GE_OS 13 -#define SIMDE_CMP_GT_OS 14 -#define SIMDE_CMP_TRUE_UQ 15 -#define SIMDE_CMP_EQ_OS 16 -#define SIMDE_CMP_LT_OQ 17 -#define SIMDE_CMP_LE_OQ 18 -#define SIMDE_CMP_UNORD_S 19 -#define SIMDE_CMP_NEQ_US 20 -#define SIMDE_CMP_NLT_UQ 21 -#define SIMDE_CMP_NLE_UQ 22 -#define SIMDE_CMP_ORD_S 23 -#define SIMDE_CMP_EQ_US 24 -#define SIMDE_CMP_NGE_UQ 25 -#define SIMDE_CMP_NGT_UQ 26 -#define SIMDE_CMP_FALSE_OS 27 -#define SIMDE_CMP_NEQ_OS 28 -#define SIMDE_CMP_GE_OQ 29 -#define SIMDE_CMP_GT_OQ 30 -#define SIMDE_CMP_TRUE_US 31 - -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) -#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ -#define _CMP_LT_OS SIMDE_CMP_LT_OS -#define _CMP_LE_OS SIMDE_CMP_LE_OS -#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q -#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ -#define _CMP_NLT_US SIMDE_CMP_NLT_US -#define _CMP_NLE_US SIMDE_CMP_NLE_US -#define _CMP_ORD_Q SIMDE_CMP_ORD_Q -#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ -#define _CMP_NGE_US SIMDE_CMP_NGE_US -#define _CMP_NGT_US SIMDE_CMP_NGT_US -#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ -#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ -#define _CMP_GE_OS SIMDE_CMP_GE_OS -#define _CMP_GT_OS SIMDE_CMP_GT_OS -#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ -#define _CMP_EQ_OS SIMDE_CMP_EQ_OS -#define _CMP_LT_OQ SIMDE_CMP_LT_OQ -#define _CMP_LE_OQ SIMDE_CMP_LE_OQ -#define _CMP_UNORD_S SIMDE_CMP_UNORD_S -#define _CMP_NEQ_US SIMDE_CMP_NEQ_US -#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ -#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ -#define _CMP_ORD_S SIMDE_CMP_ORD_S -#define _CMP_EQ_US SIMDE_CMP_EQ_US -#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ -#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ -#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS -#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS -#define _CMP_GE_OQ SIMDE_CMP_GE_OQ -#define _CMP_GT_OQ SIMDE_CMP_GT_OQ -#define _CMP_TRUE_US SIMDE_CMP_TRUE_US -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castps_pd (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_pd - #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castps_si256 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps_si256 - #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castsi256_pd (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_pd(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_pd - #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castsi256_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_ps - #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_ps(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_ps - #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castpd_si256 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd_si256(a); - #else - return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd_si256 - #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setzero_si256 (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_si256(); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_setzero_si128(); - r_.m128i[1] = simde_mm_setzero_si128(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = 0; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_si256 - #define _mm256_setzero_si256() simde_mm256_setzero_si256() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setzero_ps (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_ps(); - #else - return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_ps - #define _mm256_setzero_ps() simde_mm256_setzero_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setzero_pd (void) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setzero_pd(); - #else - return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setzero_pd - #define _mm256_setzero_pd() simde_mm256_setzero_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_not_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); - r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ~(a_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_not_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = ~a_.i64; - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); - r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ~(a_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { - /* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm256_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_setone_si256 (void) { - simde__m256i_private r_; - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - __typeof__(r_.i32f) rv = { 0, }; - r_.i32f = ~rv; -#elif defined(SIMDE_X86_AVX2_NATIVE) - __m256i t = _mm256_setzero_si256(); - r_.n = _mm256_cmpeq_epi32(t, t); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } -#endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_setone_ps (void) { - return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_setone_pd (void) { - return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, - int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, - int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi8( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16); - #else - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi8 - #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, - int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); - #else - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi16 - #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, - int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); - r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); - #else - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi32 - #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi64x(e3, e2, e1, e0); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi64x(e1, e0); - r_.m128i[1] = simde_mm_set_epi64x(e3, e2); - #else - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_epi64x - #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, - uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, - uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, - uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m256i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, - uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, - uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m256i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, - uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), - HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); - r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); - #else - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - #endif - - return simde__m256i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m256i_private r_; - - r_.u64[0] = e0; - r_.u64[1] = e1; - r_.u64[2] = e2; - r_.u64[3] = e3; - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); - r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); - #else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; - r_.f32[4] = e4; - r_.f32[5] = e5; - r_.f32[6] = e6; - r_.f32[7] = e7; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_ps - #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set_pd(e3, e2, e1, e0); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set_pd(e1, e0); - r_.m128d[1] = simde_mm_set_pd(e3, e2); - #else - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_pd - #define _mm256_set_pd(e3, e2, e1, e0) \ - simde_mm256_set_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); - #else - simde__m256_private r_; - simde__m128_private - e1_ = simde__m128_to_private(e1), - e0_ = simde__m128_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128_private[0] = e0_; - r_.m128_private[1] = e1_; - #elif defined(SIMDE_HAVE_INT128_) - r_.i128[0] = e0_.i128[0]; - r_.i128[1] = e1_.i128[0]; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128 - #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); - #else - simde__m256d_private r_; - simde__m128d_private - e1_ = simde__m128d_to_private(e1), - e0_ = simde__m128d_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d_private[0] = e0_; - r_.m128d_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128d - #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); - #else - simde__m256i_private r_; - simde__m128i_private - e1_ = simde__m128i_to_private(e1), - e0_ = simde__m128i_to_private(e0); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = e0_; - r_.m128i_private[1] = e1_; - #else - r_.i64[0] = e0_.i64[0]; - r_.i64[1] = e0_.i64[1]; - r_.i64[2] = e1_.i64[0]; - r_.i64[3] = e1_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set_m128i - #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi8(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi8(a); - r_.m128i[1] = simde_mm_set1_epi8(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi8 - #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi16(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi16(a); - r_.m128i[1] = simde_mm_set1_epi16(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi16 - #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi32(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi32(a); - r_.m128i[1] = simde_mm_set1_epi32(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi32 - #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_set1_epi64x (int64_t a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_epi64x(a); - #else - simde__m256i_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_set1_epi64x(a); - r_.m128i[1] = simde_mm_set1_epi64x(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_epi64x - #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_ps(a); - #else - simde__m256_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_set1_ps(a); - r_.m128[1] = simde_mm_set1_ps(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_ps - #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_set1_pd(a); - #else - simde__m256d_private r_; - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_set1_pd(a); - r_.m128d[1] = simde_mm_set1_pd(a); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_set1_pd - #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i]; - r_.i16[i + quarter_point] = b_.i16[2 * i]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i16[i] = a_.i16[2 * i + 1]; - r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; - r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; - r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i]; - r_.i32[i + quarter_point] = b_.i32[2 * i]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; - const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i32[i] = a_.i32[2 * i + 1]; - r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; - r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; - r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i]; - r_.f32[i + quarter_point] = b_.f32[2 * i]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); - #else - const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; - const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f32[i] = a_.f32[2 * i + 1]; - r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; - r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; - r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i]; - r_.f64[i + quarter_point] = b_.f64[2 * i]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; - const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.f64[i] = a_.f64[2 * i + 1]; - r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; - r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; - r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; - } - #endif - - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_abs_ps(simde__m256 a) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_abs_pd(simde__m256d a) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } - return simde__m256d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_add_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_ps - #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_ps(a, b); - #else - return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_ps - #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_add_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_add_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_pd - #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hadd_pd(a, b); - #else - return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_pd - #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; - r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_ps - #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_addsub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; - r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_addsub_pd - #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_and_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_ps - #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_and_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_and_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_pd - #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_ps - #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_andnot_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_pd - #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ - simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_ps - #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; - } - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ - simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_pd - #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_ps(a, b, mask); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b), - mask_ = simde__m256_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); - r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_ps - #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_blendv_pd(a, b, mask); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b), - mask_ = simde__m256d_to_private(mask); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); - r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_pd - #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_pd(mem_addr); - #else - simde__m256d_private r_; - - simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); - r_.m128d[0] = tmp; - r_.m128d[1] = tmp; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_pd - #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ps(mem_addr); - #else - simde__m256_private r_; - - simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); - r_.m128[0] = tmp; - r_.m128[1] = tmp; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ps - #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcast_sd (simde_float64 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_sd(a); - #else - return simde_mm256_set1_pd(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_sd - #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_broadcast_ss(a); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); - #else - return simde_mm_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcast_ss - #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcast_ss (simde_float32 const * a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_broadcast_ss(a); - #else - return simde_mm256_set1_ps(*a); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcast_ss - #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_castpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd128_pd256(a); - #else - simde__m256d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - r_.m128d_private[0] = a_; - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd128_pd256 - #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_castpd256_pd128 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castpd256_pd128(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castpd256_pd128 - #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_castps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps128_ps256(a); - #else - simde__m256_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - r_.m128_private[0] = a_; - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps128_ps256 - #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_castps256_ps128 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castps256_ps128(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castps256_ps128 - #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_castsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - r_.m128i_private[0] = a_; - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi128_si256 - #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_castsi256_si128 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_castsi256_si128(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_castsi256_si128 - #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_round_ps (simde__m256 a, const int rounding) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyintf) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_roundf) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_floorf) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_ceilf) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } - break; - #endif - - #if defined(simde_math_truncf) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ - \ - for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ - simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ - } \ - \ - simde__m256_from_private(simde_mm256_round_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_ps - #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_round_pd (simde__m256d a, const int rounding) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - #if defined(simde_math_nearbyint) - case SIMDE_MM_FROUND_CUR_DIRECTION: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_nearbyint(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_round) - case SIMDE_MM_FROUND_TO_NEAREST_INT: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_floor) - case SIMDE_MM_FROUND_TO_NEG_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_floor(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_ceil) - case SIMDE_MM_FROUND_TO_POS_INF: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_ceil(a_.f64[i]); - } - break; - #endif - - #if defined(simde_math_trunc) - case SIMDE_MM_FROUND_TO_ZERO: - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_trunc(a_.f64[i]); - } - break; - #endif - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) - #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256d_private \ - simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ - simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ - \ - for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ - simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ - } \ - \ - simde__m256d_from_private(simde_mm256_round_pd_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_round_pd - #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_ceil_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_pd - #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_ceil_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_ceil_ps - #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL - -/* This implementation does not support signaling NaNs (yet?) */ -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_pd(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_pd(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_pd(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_pd(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_pd(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_pd(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_pd(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_pd(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_pd(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_pd(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m128d simde_mm_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ - break; \ - default: \ - simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_pd - #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - switch (imm8) { - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); - break; - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - return simde_mm_cmpeq_ps(a, b); - break; - case SIMDE_CMP_NGE_US: - case SIMDE_CMP_NGE_UQ: - return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); - break; - case SIMDE_CMP_LT_OS: - case SIMDE_CMP_LT_OQ: - return simde_mm_cmplt_ps(a, b); - break; - case SIMDE_CMP_NGT_US: - case SIMDE_CMP_NGT_UQ: - return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); - break; - case SIMDE_CMP_LE_OS: - case SIMDE_CMP_LE_OQ: - return simde_mm_cmple_ps(a, b); - break; - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - return simde_mm_cmpneq_ps(a, b); - break; - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); - break; - case SIMDE_CMP_NLT_US: - case SIMDE_CMP_NLT_UQ: - return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); - break; - case SIMDE_CMP_GE_OS: - case SIMDE_CMP_GE_OQ: - return simde_mm_cmpge_ps(a, b); - break; - case SIMDE_CMP_NLE_US: - case SIMDE_CMP_NLE_UQ: - return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); - break; - case SIMDE_CMP_GT_OS: - case SIMDE_CMP_GT_OQ: - return simde_mm_cmpgt_ps(a, b); - break; - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - return simde_mm_setzero_ps(); - break; - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - return simde_x_mm_setone_ps(); - break; - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - return simde_mm_cmpunord_ps(a, b); - break; - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - return simde_mm_cmpord_ps(a, b); - break; - } - - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); -} -/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false - * comparisons, but only when AVX-512 is enabled. */ -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m128 simde_mm_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ - break; \ - default: \ - simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ - break; \ - } \ - simde_mm_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ps - #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i64[0] = INT64_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i64[0] = ~INT64_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_sd - #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - a_.i32[0] = INT32_C(0); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - a_.i32[0] = ~INT32_C(0); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m128_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_cmp_ss - #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256d -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_pd_internal_ -#else -simde_mm256_cmp_pd -#endif -(simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256d_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ - simde__m256d simde_mm256_cmp_pd_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ - break; \ - default: \ - simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_pd_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_pd - #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) -#endif - -SIMDE_HUGE_FUNCTION_ATTRIBUTES -simde__m256 -#if defined(__clang__) && defined(__AVX512DQ__) -simde_mm256_cmp_ps_internal_ -#else -simde_mm256_cmp_ps -#endif -(simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - case SIMDE_CMP_EQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LT_OQ: - case SIMDE_CMP_LT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_LE_OQ: - case SIMDE_CMP_LE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_UNORD_Q: - case SIMDE_CMP_UNORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_UQ: - case SIMDE_CMP_NEQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NEQ_OQ: - case SIMDE_CMP_NEQ_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLT_UQ: - case SIMDE_CMP_NLT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NLE_UQ: - case SIMDE_CMP_NLE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_ORD_Q: - case SIMDE_CMP_ORD_S: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_EQ_UQ: - case SIMDE_CMP_EQ_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGE_UQ: - case SIMDE_CMP_NGE_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_NGT_UQ: - case SIMDE_CMP_NGT_US: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_FALSE_OQ: - case SIMDE_CMP_FALSE_OS: - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); - break; - - case SIMDE_CMP_GE_OQ: - case SIMDE_CMP_GE_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_GT_OQ: - case SIMDE_CMP_GT_OS: - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - break; - - case SIMDE_CMP_TRUE_UQ: - case SIMDE_CMP_TRUE_US: - r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); - break; - - default: - HEDLEY_UNREACHABLE(); - } - - return simde__m256_from_private(r_); -} -#if defined(__clang__) && defined(__AVX512DQ__) - #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ - simde__m256 simde_mm256_cmp_ps_r; \ - switch (imm8) { \ - case SIMDE_CMP_FALSE_OQ: \ - case SIMDE_CMP_FALSE_OS: \ - simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ - break; \ - case SIMDE_CMP_TRUE_UQ: \ - case SIMDE_CMP_TRUE_US: \ - simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ - break; \ - default: \ - simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ - break; \ - } \ - simde_mm256_cmp_ps_r; \ - })) -#elif defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) -#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ - simde__m256_private \ - simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ - simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ - simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ - \ - for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ - simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ - } \ - \ - simde__m256_from_private(simde_mm256_cmp_ps_r_); \ - })) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmp_ps - #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { - simde__m256_private - r_, - dest_ = simde__m256_to_private(dest), - src_ = simde__m256_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); - #endif - - return simde__m256_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { - simde__m256d_private - r_, - dest_ = simde__m256d_to_private(dest), - src_ = simde__m256d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); - #endif - - return simde__m256d_from_private(r_); -} - -HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtepi32_pd (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_pd(a); - #else - simde__m256d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_pd - #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 - simde_mm256_cvtepi32_ps (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtepi32_ps(a); - #else - simde__m256_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_ps - #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_nearbyint) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_epi32 - #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_cvtpd_ps (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtpd_ps(a); - #else - simde__m128_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtpd_ps - #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_epi32 - #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_cvtps_pd (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvtps_pd(a); - #else - simde__m256d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtps_pd - #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 -simde_mm256_cvtsd_f64 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsd_f64(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.f64[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsd_f64 - #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_cvtsi256_si32 (simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtsi256_si32(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsi256_si32 - #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 -simde_mm256_cvtss_f32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && ( \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ - HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - HEDLEY_MSVC_VERSION_CHECK(19,14,0)) - return _mm256_cvtss_f32(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - return a_.f32[0]; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtss_f32 - #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvttpd_epi32 (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttpd_epi32(a); - #else - simde__m128i_private r_; - simde__m256d_private a_ = simde__m256d_to_private(a); - - #if defined(simde_math_trunc) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttpd_epi32 - #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvttps_epi32 (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_cvttps_epi32(a); - #else - simde__m256i_private r_; - simde__m256_private a_ = simde__m256_to_private(a); - - #if defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvttps_epi32 - #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_div_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_ps - #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_div_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_div_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_div_pd - #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm256_extractf128_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - return a_.m128d[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_pd - #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_extractf128_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - return a_.m128[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_ps - #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extractf128_si256 - #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_floor_pd (simde__m256d a) { - return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_pd - #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_floor_ps (simde__m256 a) { - return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_floor_ps - #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i8[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi8 - #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i16[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi16 - #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i32[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insert_epi32 - #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - - a_.i64[index] = i; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_insert_epi64 - #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256d_private a_ = simde__m256d_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - - a_.m128d_private[imm8] = b_; - - return simde__m256d_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_pd - #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256_private a_ = simde__m256_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.m128_private[imm8] = b_; - - return simde__m256_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_ps - #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[imm8] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_insertf128_si256 - #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) -#else -# define simde_mm256_dp_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ - simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_dp_ps - #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_extract_epi32 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 7) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i32[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi32 - #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t -simde_mm256_extract_epi64 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 3) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i64[index]; -} -#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) - #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) - #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) - #endif -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) - #undef _mm256_extract_epi64 - #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_lddqu_si256 - #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_pd(mem_addr); - #else - simde__m256d r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_pd - #define _mm256_load_pd(a) simde_mm256_load_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_ps(mem_addr); - #else - simde__m256 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_ps - #define _mm256_load_ps(a) simde_mm256_load_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_load_si256 (simde__m256i const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_load_si256(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_load_si256 - #define _mm256_load_si256(a) simde_mm256_load_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_pd(a); - #else - simde__m256d r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_pd - #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_ps(a); - #else - simde__m256 r; - simde_memcpy(&r, a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_ps - #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi8(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi8 - #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ - && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi16(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi16 - #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi32(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi32 - #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) -#endif - -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ - && !defined(SIMDE_BUG_CLANG_REV_344862) \ - && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_epi64(void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#endif -#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) - #undef _mm256_loadu_epi64 - #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu_si256 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); - #else - simde__m256i r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu_si256 - #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), - simde_mm_loadu_ps(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128 - #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128d(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), - simde_mm_loadu_pd(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128d - #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - return _mm256_loadu2_m128i(hiaddr, loaddr); - #else - return - simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), - simde_mm_loadu_si128(hiaddr), 1); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_loadu2_m128i - #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); - #else - return _mm_maskload_pd(mem_addr, mask); - #endif - #else - simde__m128d_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_pd(simde_mm_load_pd(mem_addr), - simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_pd - #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); - #else - return _mm256_maskload_pd(mem_addr, mask); - #endif - #else - simde__m256d_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_pd - #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); - #else - return _mm_maskload_ps(mem_addr, mask); - #endif - #else - simde__m128_private r_; - simde__m128i_private - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde_mm_and_ps(simde_mm_load_ps(mem_addr), - simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_ps - #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); - #else - return _mm256_maskload_ps(mem_addr, mask); - #endif - #else - simde__m256_private r_; - simde__m256i_private mask_ = simde__m256i_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_ps - #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); - #else - _mm_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128d_private a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) - mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) - mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.f64[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_pd - #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); - #else - _mm256_maskstore_pd(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256d_private a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.f64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_pd - #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); - #else - _mm_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128_private a_ = simde__m128_to_private(a); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) - mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) - mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) - mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); - if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) - mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_ps - #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); - #else - _mm256_maskstore_ps(mem_addr, mask, a); - #endif - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256_private a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.f32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_ps - #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_min_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_ps - #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_min_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_min_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_pd - #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_max_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_ps - #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_max_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_max_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_pd - #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_movedup_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movedup_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { - r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movedup_pd - #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_movehdup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movehdup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movehdup_ps - #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_moveldup_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { - r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_moveldup_ps - #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_ps(a); - #else - simde__m256_private a_ = simde__m256_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { - r |= (a_.u32[i] >> 31) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_ps - #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_movemask_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_movemask_pd(a); - #else - simde__m256d_private a_ = simde__m256d_to_private(a); - int r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { - r |= (a_.u64[i] >> 63) << i; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_pd - #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_ps - #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_mul_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_mul_pd - #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_or_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_ps - #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_or_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_or_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] | b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_pd - #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute_ps (simde__m256 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_ps - #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute_pd - #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute_ps (simde__m128 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_ps - #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute_pd (simde__m128d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permute_pd - #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_ps(a, b); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make( - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), - (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[b_.i32[i] & 3]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_ps - #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_permutevar_pd(a, b); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make( - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), - (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; - } - #endif - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_permutevar_pd - #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_ps - #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_permutevar_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - simde__m256i_private b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar_pd - #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); - r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_ps - #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); - r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_pd - #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) -# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2f128_si256 - #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rcp_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rcp_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); - r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rcp_ps - #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_rsqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_rsqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_rsqrt_ps - #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi8 ( - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi8( - e31, e30, e29, e28, e27, e26, e25, e24, - e23, e22, e21, e20, e19, e18, e17, e16, - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi8( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi8 - #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi16 ( - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi16( - e15, e14, e13, e12, e11, e10, e9, e8, - e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi16( - e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi16 - #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi32 ( - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi32 - #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_epi64x(e3, e2, e1, e0); - #else - return simde_mm256_set_epi64x(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_epi64x - #define _mm256_setr_epi64x(e3, e2, e1, e0) \ - simde_mm256_setr_epi64x(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_ps ( - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); - #else - return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_ps - #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_setr_pd(e3, e2, e1, e0); - #else - return simde_mm256_set_pd(e0, e1, e2, e3); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_pd - #define _mm256_setr_pd(e3, e2, e1, e0) \ - simde_mm256_setr_pd(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128(lo, hi); - #else - return simde_mm256_set_m128(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128 - #define _mm256_setr_m128(lo, hi) \ - simde_mm256_setr_m128(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128d(lo, hi); - #else - return simde_mm256_set_m128d(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128d - #define _mm256_setr_m128d(lo, hi) \ - simde_mm256_setr_m128d(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { - #if defined(SIMDE_X86_AVX_NATIVE) && \ - !defined(SIMDE_BUG_GCC_REV_247851) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) - return _mm256_setr_m128i(lo, hi); - #else - return simde_mm256_set_m128i(hi, lo); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_setr_m128i - #define _mm256_setr_m128i(lo, hi) \ - simde_mm256_setr_m128i(lo, hi) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; - r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; - r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; - r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; - r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - simde_mm256_set_m128( \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ - simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_ps(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ - (((imm8) >> 0) & 3) + 0, \ - (((imm8) >> 2) & 3) + 0, \ - (((imm8) >> 4) & 3) + 8, \ - (((imm8) >> 6) & 3) + 8, \ - (((imm8) >> 0) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 12, \ - (((imm8) >> 6) & 3) + 12) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_ps - #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - r_.f64[0] = a_.f64[((imm8 ) & 1) ]; - r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; - r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; - r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_NATIVE) - #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) -#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - simde_mm256_set_m128d( \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 2) & 3), \ - simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 0) & 3)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) - #define simde_mm256_shuffle_pd(a, b, imm8) \ - SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ - (((imm8) >> 0) & 1) + 0, \ - (((imm8) >> 1) & 1) + 4, \ - (((imm8) >> 2) & 1) + 2, \ - (((imm8) >> 3) & 1) + 6) -#endif -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_pd - #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sqrt_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); - r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_ps - #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sqrt_pd (simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sqrt_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); - r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sqrt_pd - #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_ps - #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_pd - #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_store_si256(mem_addr, a); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_store_si256 - #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_ps - #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_pd - #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu_si256 - #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128(hi_addr, lo_addr, a); - #else - simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); - simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128 - #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128d(hi_addr, lo_addr, a); - #else - simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); - simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128d - #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) - _mm256_storeu2_m128i(hi_addr, lo_addr, a); - #else - simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); - simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_storeu2_m128i - #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_ps(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_ps - #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_pd(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_pd - #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - _mm256_stream_si256(mem_addr, a); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - __builtin_nontemporal_store(a, SIMDE_ALIGN_CAST(__typeof__(a)*, mem_addr)); - #else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_stream_si256 - #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_ps - #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_ps(a, b); - #else - return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_ps - #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_sub_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_pd - #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_hsub_pd(a, b); - #else - return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_pd - #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_undefined_ps (void) { - simde__m256_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256_to_private(simde_mm256_setzero_ps()); -#endif - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_ps - #define _mm256_undefined_ps() simde_mm256_undefined_ps() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_undefined_pd (void) { - simde__m256d_private r_; - -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); -#endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_pd - #define _mm256_undefined_pd() simde_mm256_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_undefined_si256 (void) { - simde__m256i_private r_; -#if \ - defined(SIMDE_X86_AVX_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ - (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) - r_.n = _mm256_undefined_si256(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_undefined_si256 - #define _mm256_undefined_si256() simde_mm256_undefined_si256() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); - r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_ps - #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_xor_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); - r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] ^ b_.u64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_pd - #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { - return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { - return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_x_mm256_negate_ps(simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_x_mm256_negate_pd(simde__m256d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m256d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); - #else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; - r_.f32[4] = a_.f32[6]; - r_.f32[5] = b_.f32[6]; - r_.f32[6] = a_.f32[7]; - r_.f32[7] = b_.f32[7]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_ps - #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpackhi_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); - #else - r_.f64[0] = a_.f64[1]; - r_.f64[1] = b_.f64[1]; - r_.f64[2] = a_.f64[3]; - r_.f64[3] = b_.f64[3]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_pd - #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_ps(a, b); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); - #else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; - r_.f32[4] = a_.f32[4]; - r_.f32[5] = b_.f32[4]; - r_.f32[6] = a_.f32[5]; - r_.f32[7] = b_.f32[5]; - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_ps - #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_unpacklo_pd(a, b); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); - #else - r_.f64[0] = a_.f64[0]; - r_.f64[1] = b_.f64[0]; - r_.f64[2] = a_.f64[2]; - r_.f64[3] = b_.f64[2]; - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_pd - #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_zextps128_ps256 (simde__m128 a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); - #else - simde__m256_private r_; - - r_.m128_private[0] = simde__m128_to_private(a); - r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextps128_ps256 - #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_zextpd128_pd256 (simde__m128d a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); - #else - simde__m256d_private r_; - - r_.m128d_private[0] = simde__m128d_to_private(a); - r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextpd128_pd256 - #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_zextsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); - #else - simde__m256i_private r_; - - r_.m128i_private[0] = simde__m128i_to_private(a); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_zextsi128_si256 - #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_ps - #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testc_pd - #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= ~a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_ps - #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= ~a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_pd - #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testc_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= ~a_.i32f[i] & b_.i32f[i]; - } - - return HEDLEY_STATIC_CAST(int, !r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testc_si256 - #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); - m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); - m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0); - #else - uint_fast32_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_ps - #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testz_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testz_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); - return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); - #else - uint_fast64_t r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testz_pd - #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_ps(a, b); - #else - uint_fast32_t r = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - r |= a_.u32[i] & b_.u32[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_ps - #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_pd(a, b); - #else - uint_fast64_t r = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - r |= a_.u64[i] & b_.u64[i]; - } - - return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_pd - #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testz_si256(a, b); - #else - int_fast32_t r = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r |= a_.i32f[i] & b_.i32f[i]; - } - - r = !r; - #endif - - return HEDLEY_STATIC_CAST(int, r); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testz_si256 - #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_ps(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); - v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); - m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); - m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); - m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); - return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); - #else - uint32_t rz = 0, rc = 0; - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_ps - #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm_testnzc_pd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - #if defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); - v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); - return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) - & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); - #else - uint64_t rc = 0, rz = 0; - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm_testnzc_pd - #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_ps(a, b); - #else - uint32_t rc = 0, rz = 0; - simde__m256_private - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { - rc |= ~a_.u32[i] & b_.u32[i]; - rz |= a_.u32[i] & b_.u32[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_ps - #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_pd(a, b); - #else - uint64_t rc = 0, rz = 0; - simde__m256d_private - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { - rc |= ~a_.u64[i] & b_.u64[i]; - rz |= a_.u64[i] & b_.u64[i]; - } - - return - (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & - (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_pd - #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX_NATIVE) - return _mm256_testnzc_si256(a, b); - #else - int32_t rc = 0, rz = 0; - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - rc |= ~a_.i32f[i] & b_.i32f[i]; - rz |= a_.i32f[i] & b_.i32f[i]; - } - - return !!(rc & rz); - #endif -} -#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) - #undef _mm256_testnzc_si256 - #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX_H) */ -/* :: End simde/x86/avx.h :: */ - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi8(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi8 - #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi16(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi16 - #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi32(simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_abs_epi32(a); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); - r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi32 - #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi8 - #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi16 - #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi16(a, b); - #else - return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi16 - #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi32 - #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadd_epi32(a, b); - #else - return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadd_epi32 - #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_add_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_add_epi64 - #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) - SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - if (HEDLEY_UNLIKELY(count > 31)) - return simde_mm256_setzero_si256(); - - for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int srcpos = count + HEDLEY_STATIC_CAST(int, i); - if (srcpos > 31) { - r_.m128i_private[h].i8[i] = 0; - } else if (srcpos > 15) { - r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; - } else { - r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; - } - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) -# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_alignr_epi8(a, b, count) \ - simde_mm256_set_m128i( \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ - simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_alignr_epi8 - #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_and_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_and_si256 - #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_andnot_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_andnot_si256 - #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi8 - #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epi16 - #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hadds_epi16(a, b); - #else - return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hadds_epi16 - #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu8 - #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_adds_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_adds_epu16 - #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu8 - #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_avg_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_avg_epu16 - #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) -# define simde_mm_blend_epi32(a, b, imm8) \ - simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_blend_epi32 - #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) -#elif defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi16(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ - simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi16 - #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_blend_epi32(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ - simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blend_epi32 - #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) -#endif - - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_blendv_epi8(a, b, mask); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - mask_ = simde__m256i_to_private(mask); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); - r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - __typeof__(mask_.i8) tmp = mask_.i8 >> 7; - r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - int8_t tmp = mask_.i8[i] >> 7; - r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_blendv_epi8 - #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastb_epi8(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastb_epi8 - #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastb_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastb_epi8(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastb_epi8 - #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastw_epi16(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastw_epi16 - #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastw_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastw_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastw_epi16 - #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastd_epi32(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastd_epi32 - #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastd_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastd_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastd_epi32 - #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastq_epi64(a); - #else - simde__m128i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastq_epi64 - #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastq_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastq_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastq_epi64 - #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_broadcastss_ps(a); - #elif defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_shuffle_ps(a, a, 0); - #else - simde__m128_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastss_ps - #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_broadcastss_ps (simde__m128 a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastss_ps(a); - #else - simde__m256_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - __m128 tmp = _mm_permute_ps(a_.n, 0); - r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); - #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); - #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) - r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastss_ps - #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_broadcastsd_pd (simde__m128d a) { - return simde_mm_movedup_pd(a); -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_broadcastsd_pd - #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_broadcastsd_pd (simde__m128d a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_broadcastsd_pd(a); - #else - simde__m256d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsd_pd - #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_broadcastsi128_si256 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) - return _mm256_broadcastsi128_si256(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = a_; - r_.m128i_private[1] = a_; - #else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = a_.i64[1]; - r_.i64[2] = a_.i64[0]; - r_.i64[3] = a_.i64[1]; - #endif - - return simde__m256i_from_private(r_); - #endif -} -#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_broadcastsi128_si256 - #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) - #undef _mm_broadcastsi128_si256 - #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i - imm8; - if(i >= (ssize/2)) { - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); - - SIMDE_VECTORIZE - for (int i = 0 ; i < ssize ; i++) { - const int e = i + imm8; - if(i < (ssize/2)) { - if(e >= 0 && e < (ssize/2)) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - else{ - if(e >= (ssize/2) && e < ssize) - r_.i8[i] = a_.i8[e]; - else - r_.i8[i] = 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) - #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi8 - #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi16 - #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi32 - #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpeq_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpeq_epi64 - #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi8 - #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 > b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi16 - #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi32 - #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cmpgt_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cmpgt_epi64 - #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi16 - #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi32 - #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi8_epi64 - #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi32 - #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi16_epi64 - #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepi32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepi32_epi64 - #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi16(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi16 - #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi32 - #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu8_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu8_epi64 - #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi32(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi32 - #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu16_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu16_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu16_epi64 - #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cvtepu32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_cvtepu32_epi64(a); - #else - simde__m256i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtepu32_epi64 - #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi8 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 31){ - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i8[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi8 - #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_mm256_extract_epi16 (simde__m256i a, const int index) - SIMDE_REQUIRE_RANGE(index, 0, 15) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.i16[index]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) - #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extract_epi16 - #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - return a_.m128i[imm8]; -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_extracti128_si256 - #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi32 - #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi32 - #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi32 - #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi32 - #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi32 - #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi32 - #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i32[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi32 - #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128i_private - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int32_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i32[i] = dst; - } - else { - r_.i32[i] = src_.i32[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi32 - #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_epi64 - #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_epi64 - #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256i_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_epi64 - #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_epi64 - #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_epi64 - #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex), - src_ = simde__m128i_to_private(src), - mask_ = simde__m128i_to_private(mask), - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_epi64 - #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.i64[i] = dst; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) - #else - #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_epi64 - #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex), - src_ = simde__m256i_to_private(src), - mask_ = simde__m256i_to_private(mask), - r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - int64_t dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.i64[i] = dst; - } - else { - r_.i64[i] = src_.i64[i]; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) - #else - #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) - #endif -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_epi64 - #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_ps - #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_ps - #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_ps - #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256_private - src_ = simde__m256_to_private(src), - mask_ = simde__m256_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m256_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_ps - #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_ps - #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_ps - #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f32[i] = dst; - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_ps - #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m128_private - src_ = simde__m128_to_private(src), - mask_ = simde__m128_to_private(mask), - r_ = simde__m128_to_private(simde_mm_setzero_ps()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i32[i] >> 31) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float32 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f32[i] = dst; - } - else { - r_.f32[i] = src_.f32[i]; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_ps - #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i32gather_pd - #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i32gather_pd - #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m256d_private - r_; - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i32gather_pd - #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_; - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i32gather_pd - #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_i64gather_pd - #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m128i_private - vindex_ = simde__m128i_to_private(vindex); - simde__m128d_private - src_ = simde__m128d_to_private(src), - mask_ = simde__m128d_to_private(mask), - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_i64gather_pd - #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src, sizeof(dst)); - r_.f64[i] = dst; - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_i64gather_pd - #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) - SIMDE_REQUIRE_CONSTANT(scale) - HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { - simde__m256i_private - vindex_ = simde__m256i_to_private(vindex); - simde__m256d_private - src_ = simde__m256d_to_private(src), - mask_ = simde__m256d_to_private(mask), - r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); - const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { - if ((mask_.i64[i] >> 63) & 1) { - const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); - simde_float64 dst; - simde_memcpy(&dst, src1, sizeof(dst)); - r_.f64[i] = dst; - } - else { - r_.f64[i] = src_.f64[i]; - } - } - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_i64gather_pd - #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m256i_private a_ = simde__m256i_to_private(a); - simde__m128i_private b_ = simde__m128i_to_private(b); - - a_.m128i_private[ imm8 & 1 ] = b_; - - return simde__m256i_from_private(a_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_inserti128_si256 - #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_madd_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) - SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); - SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); - SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); - - SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); - SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); - product = a32x16 * b32x16; - - even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); - odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); - - r_.i32 = even + odd; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_madd_epi16 - #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maddubs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int idx = HEDLEY_STATIC_CAST(int, i) << 1; - int32_t ts = - (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + - (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); - r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maddubs_epi16 - #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi32(mem_addr, mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - mask_shr_.i32[i] = mask_.i32[i] >> 31; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi32 - #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi32(mem_addr, mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi32 - #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m128i_private - r_, - mask_ = simde__m128i_to_private(mask), - mask_shr_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { - mask_shr_.i64[i] = mask_.i64[i] >> 63; - } - #endif - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskload_epi64 - #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); - #else - simde__m256i_private - mask_ = simde__m256i_to_private(mask), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskload_epi64 - #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi32(mem_addr, mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi32 - #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi32(mem_addr, mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - if (mask_.u32[i] & (UINT32_C(1) << 31)) - mem_addr[i] = a_.i32[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi32 - #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m128i_private mask_ = simde__m128i_to_private(mask); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] >> 63) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_maskstore_epi64 - #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); - #else - simde__m256i_private mask_ = simde__m256i_to_private(mask); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - if (mask_.u64[i] & (UINT64_C(1) << 63)) - mem_addr[i] = a_.i64[i]; - } - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskstore_epi64 - #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_max_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi8 - #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu8 - #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu16 - #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epu32 - #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi16 - #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_max_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_max_epi32 - #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) - return _mm256_min_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi8 - #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi16 - #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epi32 - #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu8 - #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu16 - #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_min_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_min_epu32 - #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t -simde_mm256_movemask_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_movemask_epi8(a); - #else - simde__m256i_private a_ = simde__m256i_to_private(a); - uint32_t r = 0; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); - } - #else - r = 0; - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); - } - #endif - - return HEDLEY_STATIC_CAST(int32_t, r); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_movemask_epi8 - #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - const int a_offset1 = imm8 & 4; - const int b_offset1 = (imm8 & 3) << 2; - const int a_offset2 = (imm8 >> 3) & 4; - const int b_offset2 = ((imm8 >> 3) & 3) << 2; - - #if defined(simde_math_abs) - const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; - for (int i = 0 ; i < halfway_point ; i++) { - r_.u16[i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); - r_.u16[halfway_point + i] = - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + - HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ - simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mpsadbw_epu8 - #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * - HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mul_epu32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhi_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mulhrs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi16(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi16 - #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_mullo_epi32(a, b); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_mullo_epi32 - #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 * b_.u32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] * b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_or_si256 - #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi16 - #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packs_epi32(a, b); - #else - simde__m256i_private - r_, - v_[] = { - simde__m256i_to_private(a), - simde__m256i_to_private(b) - }; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); - r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packs_epi32 - #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi16 - #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_packus_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < quarter_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_packus_epi32 - #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); - r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute2x128_si256 - #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; - r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; - r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; - r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_epi64 - #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; - r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; - r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; - r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permute4x64_pd - #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_permutevar8x32_epi32(a, idx); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 7]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_epi32 - #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { - #if defined(SIMDE_X86_AVX2_NATIVE) - #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) - return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); - #else - return _mm256_permutevar8x32_ps(a, idx); - #endif - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - simde__m256i_private - idx_ = simde__m256i_to_private(idx); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 7]; - } - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_permutevar8x32_ps - #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sad_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sad_epu8 - #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_shuffle_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { - r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; - r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi8 - #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { - r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_shuffle_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i32 = \ - SIMDE_SHUFFLE_VECTOR_(32, 32, \ - (simde_tmp_a_).i32, \ - (simde_tmp_a_).i32, \ - ((imm8) ) & 3, \ - ((imm8) >> 2) & 3, \ - ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4) }); })) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shuffle_epi32 - #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - 0, 1, 2, 3, \ - (((imm8) ) & 3) + 4, \ - (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4, \ - 8, 9, 10, 11, \ - ((((imm8) ) & 3) + 8 + 4), \ - ((((imm8) >> 2) & 3) + 8 + 4), \ - ((((imm8) >> 4) & 3) + 8 + 4), \ - ((((imm8) >> 6) & 3) + 8 + 4) \ - ) }); })) -#else -# define simde_mm256_shufflehi_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflehi_epi16 - #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ - const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ - simde__m256i_from_private((simde__m256i_private) { .i16 = \ - SIMDE_SHUFFLE_VECTOR_(16, 32, \ - (simde_tmp_a_).i16, \ - (simde_tmp_a_).i16, \ - (((imm8) ) & 3), \ - (((imm8) >> 2) & 3), \ - (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), \ - 4, 5, 6, 7, \ - ((((imm8) ) & 3) + 8), \ - ((((imm8) >> 2) & 3) + 8), \ - ((((imm8) >> 4) & 3) + 8), \ - ((((imm8) >> 6) & 3) + 8), \ - 12, 13, 14, 15) }); })) -#else -# define simde_mm256_shufflelo_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ - simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_shufflelo_epi16 - #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi8 - #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi16 - #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sign_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sign_epi32 - #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi16 - #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 31) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi32 - #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sll_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - if (shift > 63) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sll_epi64 - #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - /* Note: There is no consistency in how compilers handle values outside of - the expected range, hence the discrepancy between what we allow and what - Intel specifies. Some compilers will return 0, others seem to just mask - off everything outside of the range. */ - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { - r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi16 - #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { - r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi32 - #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_slli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_epi64 - #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_slli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = HEDLEY_STATIC_CAST(int, i) - imm8; - r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_slli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_slli_si256 - #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); - r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi32 - #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi32 - #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); - r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_sllv_epi64 - #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sllv_epi64 - #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi16 - #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sra_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sra_epi32 - #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi16 - #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srai_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 31) shift = 31; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srai_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srai_epi32 - #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm_srav_epi32(a, count); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); - r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srav_epi32 - #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srav_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - count_ = simde__m256i_to_private(count); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); - r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); - if (shift > 31) shift = 31; - r_.i32[i] = a_.i32[i] >> shift; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srav_epi32 - #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi16(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi16 - #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi32(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi32 - #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_srl_epi64(a, count); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); - r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> (shift); - } - #endif - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srl_epi64 - #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi16 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - if (imm8 > 15) - return simde_mm256_setzero_si256(); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { - r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); - } - #else - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi16(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi16 - #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi32 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); - for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { - r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi32(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi32 - #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_epi64 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) -# define simde_mm256_srli_epi64(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_epi64 - #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srli_si256 (simde__m256i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { - const int e = imm8 + HEDLEY_STATIC_CAST(int, i); - r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; - } - } - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) -# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) -#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -# define simde_mm256_srli_si256(a, imm8) \ - simde_mm256_set_m128i( \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ - simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srli_si256 - #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi32 - #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi32 - #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm_srlv_epi64 - #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { - simde__m256i_private - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; - } - #endif - - return simde__m256i_from_private(r_); -} -#if defined(SIMDE_X86_AVX2_NATIVE) - #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) -#endif -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_srlv_epi64 - #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); - #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT) - return __builtin_nontemporal_load(mem_addr); - #else - simde__m256i r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) -# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi8 - #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi16 - #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi16(a, b); - #else - return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi16 - #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi32 - #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsub_epi32(a, b); - #else - return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsub_epi32 - #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_sub_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_sub_epi64 - #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; - #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } - #endif - - return simde__m256i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi8 - #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epi16 - #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_hsubs_epi16(a, b); - #else - return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_hsubs_epi16 - #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu8 - #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_subs_epu16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_subs_epu16 - #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int -simde_x_mm256_test_all_ones (simde__m256i a) { - simde__m256i_private a_ = simde__m256i_to_private(a); - int r; - int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - - SIMDE_VECTORIZE_REDUCTION(&:r_) - for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { - r_ &= a_.i32f[i]; - } - - r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); - - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi8 - #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi16 - #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 0, 8, 1, 9, 4, 12, 5, 13); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi32 - #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpacklo_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i]; - r_.i64[2 * i + 1] = b_.i64[2 * i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpacklo_epi64 - #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi8(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { - r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; - r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi8 - #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi16(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, - 4, 20, 5, 21, 6, 22, 7, 23, - 12, 28, 13, 29, 14, 30, 15, 31); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { - r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; - r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi16 - #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi32(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, - 2, 10, 3, 11, 6, 14, 7, 15); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { - r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; - r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi32 - #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_unpackhi_epi64(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { - r_.i64[2 * i] = a_.i64[2 * i + 1]; - r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_unpackhi_epi64 - #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_xor_si256(a, b); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b); - - #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) - #undef _mm256_xor_si256 - #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX2_H) */ -/* :: End simde/x86/avx2.h :: */ - -#if !defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) -# define SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cmov_si128 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_cmov_si128(a, b, c); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, b, c, 0xe4); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(_mm_and_si128(c, a), _mm_andnot_si128(c, b)); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vbslq_s8(c_.neon_u8, a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, c_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, c_.altivec_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = (c_.i32f & a_.i32f) | (~c_.i32f & b_.i32f); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_cmov_si128(a, b, c) simde_mm_cmov_si128((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_cmov_si256 (simde__m256i a, simde__m256i b, simde__m256i c) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_98521) && !defined(SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256) - return _mm256_cmov_si256(a, b, c); - #elif defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_ternarylogic_epi32(a, b, c, 0xe4); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return _mm256_or_si256(_mm256_and_si256(c, a), _mm256_andnot_si256(c, b)); - #else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a), - b_ = simde__m256i_to_private(b), - c_ = simde__m256i_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_cmov_si128(a_.m128i[i], b_.m128i[i], c_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm256_cmov_si256(a, b, c) simde_mm256_cmov_si256((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epi8(a, b) simde_mm_comeq_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epi16(a, b) simde_mm_comeq_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epi32(a, b) simde_mm_comeq_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epi64(a, b) simde_mm_comeq_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 == b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] == b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epu8(a, b) simde_mm_comeq_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 == b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epu16(a, b) simde_mm_comeq_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 == b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epu32(a, b) simde_mm_comeq_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comeq_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_EQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comeq_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 == b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comeq_epu64(a, b) simde_mm_comeq_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epi8(a, b) simde_mm_comge_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epi16(a, b) simde_mm_comge_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epi32(a, b) simde_mm_comge_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epi64(a, b) simde_mm_comge_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epu8(a, b) simde_mm_comge_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epu16(a, b) simde_mm_comge_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epu32(a, b) simde_mm_comge_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comge_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_GE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comge_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comge_epu64(a, b) simde_mm_comge_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epi8(a, b) simde_mm_comgt_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epi16(a, b) simde_mm_comgt_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epi32(a, b) simde_mm_comgt_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epi64(a, b) simde_mm_comgt_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epu8(a, b) simde_mm_comgt_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epu16(a, b) simde_mm_comgt_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epu32(a, b) simde_mm_comgt_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comgt_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_GT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comgt_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 > b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comgt_epu64(a, b) simde_mm_comgt_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epi8(a, b) simde_mm_comle_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epi16(a, b) simde_mm_comle_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epi32(a, b) simde_mm_comle_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epi64(a, b) simde_mm_comle_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epu8(a, b) simde_mm_comle_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epu16(a, b) simde_mm_comle_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epu32(a, b) simde_mm_comle_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comle_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_LE); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comle_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comle_epu64(a, b) simde_mm_comle_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epi8(a, b) simde_mm_comlt_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epi16(a, b) simde_mm_comlt_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epi32(a, b) simde_mm_comlt_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epi64(a, b) simde_mm_comlt_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epu8(a, b) simde_mm_comlt_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epu16(a, b) simde_mm_comlt_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epu32(a, b) simde_mm_comlt_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comlt_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_LT); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comlt_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_u64(a_.neon_u64, b_.neon_u64); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 < b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comlt_epu64(a, b) simde_mm_comlt_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epi8(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epi8(a, b) simde_mm_comneq_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epi16(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epi16(a, b) simde_mm_comneq_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epi32(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epi32(a, b) simde_mm_comneq_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epi64(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epi64(a, b) simde_mm_comneq_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epu8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epu8(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epu8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmvnq_u8(vceqq_u8(a_.neon_u8, b_.neon_u8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 != b_.u8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] != b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epu8(a, b) simde_mm_comneq_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epu16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epu16(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epu16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vmvnq_u16(vceqq_u16(a_.neon_u16, b_.neon_u16)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 != b_.u16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epu16(a, b) simde_mm_comneq_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epu32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epu32(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epu32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_u32(a_.neon_u32, b_.neon_u32)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 != b_.u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epu32(a, b) simde_mm_comneq_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comneq_epu64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) - return _mm_com_epu64(a, b, _MM_PCOMCTRL_NEQ); - #elif defined(SIMDE_X86_XOP_NATIVE) - return _mm_comneq_epu64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_u64(a_.neon_u64, b_.neon_u64))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 != b_.u64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comneq_epu64(a, b) simde_mm_comneq_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epi8 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epi8(a, b) simde_mm_comfalse_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epi16 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epi16(a, b) simde_mm_comfalse_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epi32 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epi32(a, b) simde_mm_comfalse_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epi64 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epi64(a, b) simde_mm_comfalse_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epu8 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epu8(a, b) simde_mm_comfalse_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epu16 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epu16(a, b) simde_mm_comfalse_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epu32 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epu32(a, b) simde_mm_comfalse_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comfalse_epu64 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_mm_setzero_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comfalse_epu64(a, b) simde_mm_comfalse_epu64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epi8 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epi8(a, b) simde_mm_comtrue_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epi16 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epi16(a, b) simde_mm_comtrue_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epi32 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epi32(a, b) simde_mm_comtrue_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epi64 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epi64(a, b) simde_mm_comtrue_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epu8 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epu8(a, b) simde_mm_comtrue_epu8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epu16 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epu16(a, b) simde_mm_comtrue_epu16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epu32 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epu32(a, b) simde_mm_comtrue_epu32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_comtrue_epu64 (simde__m128i a, simde__m128i b) { - (void) a; - (void) b; - return simde_x_mm_setone_si128(); -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_comtrue_epu64(a, b) simde_mm_comtrue_epu64((a), (b)) -#endif - -#if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) - #define SIMDE_X86_XOP_HAVE_COM_ 1 - #define SIMDE_MM_PCOMCTRL_LT _MM_PCOMCTRL_LT - #define SIMDE_MM_PCOMCTRL_LE _MM_PCOMCTRL_LE - #define SIMDE_MM_PCOMCTRL_GT _MM_PCOMCTRL_GT - #define SIMDE_MM_PCOMCTRL_GE _MM_PCOMCTRL_GE - #define SIMDE_MM_PCOMCTRL_EQ _MM_PCOMCTRL_EQ - #define SIMDE_MM_PCOMCTRL_NEQ _MM_PCOMCTRL_NEQ - #define SIMDE_MM_PCOMCTRL_FALSE _MM_PCOMCTRL_FALSE - #define SIMDE_MM_PCOMCTRL_TRUE _MM_PCOMCTRL_TRUE -#else - #define SIMDE_MM_PCOMCTRL_LT 0 - #define SIMDE_MM_PCOMCTRL_LE 1 - #define SIMDE_MM_PCOMCTRL_GT 2 - #define SIMDE_MM_PCOMCTRL_GE 3 - #define SIMDE_MM_PCOMCTRL_EQ 4 - #define SIMDE_MM_PCOMCTRL_NEQ 5 - #define SIMDE_MM_PCOMCTRL_FALSE 6 - #define SIMDE_MM_PCOMCTRL_TRUE 7 - - #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _MM_PCOMCTRL_LT SIMDE_MM_PCOMCTRL_LT - #define _MM_PCOMCTRL_LE SIMDE_MM_PCOMCTRL_LE - #define _MM_PCOMCTRL_GT SIMDE_MM_PCOMCTRL_GT - #define _MM_PCOMCTRL_GE SIMDE_MM_PCOMCTRL_GE - #define _MM_PCOMCTRL_EQ SIMDE_MM_PCOMCTRL_EQ - #define _MM_PCOMCTRL_NEQ SIMDE_MM_PCOMCTRL_NEQ - #define _MM_PCOMCTRL_FALSE SIMDE_MM_PCOMCTRL_FALSE - #define _MM_PCOMCTRL_TRUE SIMDE_MM_PCOMCTRL_TRUE - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epi8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epi8(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epi8(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epi8(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epi8(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epi8(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epi8(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epi8(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epi8(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epi8(a, b, imm8) _mm_com_epi8((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epi8(a, b, imm8) simde_mm_com_epi8((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epi16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epi16(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epi16(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epi16(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epi16(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epi16(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epi16(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epi16(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epi16(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epi16(a, b, imm8) _mm_com_epi16((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epi16(a, b, imm8) simde_mm_com_epi16((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epi32 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epi32(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epi32(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epi32(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epi32(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epi32(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epi32(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epi32(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epi32(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epi32(a, b, imm8) _mm_com_epi32((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epi32(a, b, imm8) simde_mm_com_epi32((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epi64 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epi64(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epi64(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epi64(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epi64(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epi64(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epi64(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epi64(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epi64(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epi64(a, b, imm8) _mm_com_epi64((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epi64(a, b, imm8) simde_mm_com_epi64((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epu8 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epu8(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epu8(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epu8(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epu8(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epu8(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epu8(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epu8(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epu8(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epu8(a, b, imm8) _mm_com_epu8((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epu8(a, b, imm8) simde_mm_com_epu8((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epu16 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epu16(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epu16(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epu16(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epu16(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epu16(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epu16(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epu16(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epu16(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epu16(a, b, imm8) _mm_com_epu16((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epu16(a, b, imm8) simde_mm_com_epu16((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epu32 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epu32(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epu32(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epu32(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epu32(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epu32(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epu32(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epu32(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epu32(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epu32(a, b, imm8) _mm_com_epu32((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epu32(a, b, imm8) simde_mm_com_epu32((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_com_epu64 (simde__m128i a, simde__m128i b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { - switch (imm8) { - case SIMDE_MM_PCOMCTRL_LT: - return simde_mm_comlt_epu64(a, b); - case SIMDE_MM_PCOMCTRL_LE: - return simde_mm_comle_epu64(a, b); - case SIMDE_MM_PCOMCTRL_GT: - return simde_mm_comgt_epu64(a, b); - case SIMDE_MM_PCOMCTRL_GE: - return simde_mm_comge_epu64(a, b); - case SIMDE_MM_PCOMCTRL_EQ: - return simde_mm_comeq_epu64(a, b); - case SIMDE_MM_PCOMCTRL_NEQ: - return simde_mm_comneq_epu64(a, b); - case SIMDE_MM_PCOMCTRL_FALSE: - return simde_mm_comfalse_epu64(a, b); - case SIMDE_MM_PCOMCTRL_TRUE: - return simde_mm_comtrue_epu64(a, b); - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); - } -} -#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) - #define simde_mm_com_epu64(a, b, imm8) _mm_com_epu64((a), (b), (imm8)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_com_epu64(a, b, imm8) simde_mm_com_epu64((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_frcz_ps (simde__m128 a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_frcz_ps(a); - #else - simde__m128_private - r_, - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if defined(simde_math_modff) - simde_float32 integral; - r_.f32[i] = simde_math_modff(a_.f32[i], &integral); - #else - r_.f32[i] = (a_.f32[i] / 1.0f); - #endif - } - - return simde__m128_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_frcz_pd (simde__m128d a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_frcz_pd(a); - #else - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - #if defined(simde_math_modf) - simde_float64 integral; - r_.f64[i] = simde_math_modf(a_.f64[i], &integral); - #else - r_.f64[i] = (a_.f64[i] / 1.0f); - #endif - } - - return simde__m128d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_frcz_ss (simde__m128 a, simde__m128 b) { - #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) - return _mm_frcz_ss(a, b); - #else - simde__m128_private - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - #if defined(simde_math_modff) - simde_float32 integral; - a_.f32[0] = simde_math_modff(b_.f32[0], &integral); - #else - a_.f32[0] = (b_.f32[0] / 1.0f); - #endif - - return simde__m128_from_private(a_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_frcz_ss(a, b) simde_mm_frcz_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_frcz_sd (simde__m128d a, simde__m128d b) { - #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) - return _mm_frcz_sd(a, b); - #else - simde__m128d_private - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - #if defined(simde_math_modf) - simde_float64 integral; - a_.f64[0] = simde_math_modf(b_.f64[0], &integral); - #else - a_.f64[0] = (b_.f64[0] / 1.0f); - #endif - - return simde__m128d_from_private(a_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_frcz_sd(a, b) simde_mm_frcz_sd((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_frcz_ps (simde__m256 a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm256_frcz_ps(a); - #else - simde__m256_private - r_, - a_ = simde__m256_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_frcz_ps(a_.m128[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - #if defined(simde_math_modff) - simde_float32 integral; - r_.f32[i] = simde_math_modff(a_.f32[i], &integral); - #else - r_.f32[i] = (a_.f32[i] / 1.0f); - #endif - } - #endif - - return simde__m256_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_frcz_pd (simde__m256d a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm256_frcz_pd(a); - #else - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_frcz_pd(a_.m128d[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - #if defined(simde_math_modf) - simde_float64 integral; - r_.f64[i] = simde_math_modf(a_.f64[i], &integral); - #else - r_.f64[i] = (a_.f64[i] / 1.0f); - #endif - } - #endif - - return simde__m256d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddw_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddw_epi8(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vpaddlq_s8(a_.neon_i8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_extadd_pairwise_i8x16(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); - r_.altivec_i16 = - vec_add( - vec_mule(a_.altivec_i8, one), - vec_mulo(a_.altivec_i8, one) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = - ((a_.i16 << 8) >> 8) + - ((a_.i16 >> 8) ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddw_epi8(a) simde_mm_haddw_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddw_epu8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddw_epu8(a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - return _mm_maddubs_epi16(a, _mm_set1_epi8(INT8_C(1))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vpaddlq_u8(a_.neon_u8); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_extadd_pairwise_u8x16(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); - r_.altivec_u16 = - vec_add( - vec_mule(a_.altivec_u8, one), - vec_mulo(a_.altivec_u8, one) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = - ((a_.u16 << 8) >> 8) + - ((a_.u16 >> 8) ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddw_epu8(a) simde_mm_haddw_epu8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddd_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddd_epi8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(a_.neon_i8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) ]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 1]) + - HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 3]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddd_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddd_epi16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, _mm_set1_epi16(INT8_C(1))); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vpaddlq_s16(a_.neon_i16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_extadd_pairwise_i16x8(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); - r_.altivec_i32 = - vec_add( - vec_mule(a_.altivec_i16, one), - vec_mulo(a_.altivec_i16, one) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = - ((a_.i32 << 16) >> 16) + - ((a_.i32 >> 16) ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddd_epu8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddd_epu8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vpaddlq_u16(vpaddlq_u8(a_.neon_u8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = - HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) ]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 1]) + - HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 3]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddd_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddd_epu16(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return - _mm_add_epi32( - _mm_srli_epi32(a, 16), - _mm_and_si128(a, _mm_set1_epi32(INT32_C(0x0000ffff))) - ); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vpaddlq_u16(a_.neon_u16); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_extadd_pairwise_u16x8(a_.wasm_v128); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); - r_.altivec_u32 = - vec_add( - vec_mule(a_.altivec_u16, one), - vec_mulo(a_.altivec_u16, one) - ); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = - ((a_.u32 << 16) >> 16) + - ((a_.u32 >> 16) ); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epi8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(a_.neon_i8))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 1]) + - HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 3]) + - HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 5]) + - HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 7]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epi8(a) simde_mm_haddq_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epi16(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(a_.neon_i16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = - HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 1]) + - HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 3]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epi16(a) simde_mm_haddq_epi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epi32(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vpaddlq_s32(a_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epi32(a) simde_mm_haddq_epi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epu8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epu8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(a_.neon_u8))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = - HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 1]) + - HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 3]) + - HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 5]) + - HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 7]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epu8(a) simde_mm_haddq_epu8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epu16 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epu16(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(a_.neon_u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = - HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 1]) + - HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 3]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epu16(a) simde_mm_haddq_epu16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_haddq_epu32 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_haddq_epu32(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vpaddlq_u32(a_.neon_u32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) + 1]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_haddq_epu32(a) simde_mm_haddq_epu32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubw_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_hsubw_epi8(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i * 2]) - HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_hsubw_epi8(a) simde_mm_hsubw_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubd_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_hsubd_epi16(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) ]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_hsubd_epi8(a) simde_mm_hsubd_epi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_hsubq_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_hsubq_epi32(a); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) - HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_hsubq_epi32(a) simde_mm_hsubq_epi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_macc_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_macc_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmlaq_s16(c_.neon_i16, a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] * b_.i16[i]) + c_.i16[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_macc_epi16(a, b, c) simde_mm_macc_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_macc_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_macc_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_.neon_i32, b_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (a_.i32[i] * b_.i32[i]) + c_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_macc_epi32(a, b, c) simde_mm_macc_epi32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccd_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - int32x4_t a_even = vmovl_s16(vget_low_s16(even)); - int32x4_t b_even = vmovl_high_s16(even); - r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_even, b_even); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = (HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2])) + c_.i32[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccd_epi16(a, b, c) simde_mm_maccd_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_macclo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_macclo_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t even = vuzp1q_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0])) + c_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_macclo_epi16(a, b, c) simde_mm_macclo_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_macchi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_macchi_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t even = vuzp2q_s32(a_.neon_i32, b_.neon_i32); - r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1])) + c_.i64[i]; - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_macchi_epi16(a, b, c) simde_mm_macchi_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccs_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccs_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t c_lo = vmovl_s16(vget_low_s16(c_.neon_i16)); - int32x4_t c_hi = vmovl_high_s16(c_.neon_i16); - int32x4_t lo = vmlal_s16(c_lo, vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t hi = vmlal_high_s16(c_hi, a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vcombine_s16(vqmovn_s32(lo), vqmovn_s32(hi)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - tmp += c_.i16[i]; - if (tmp > INT16_MAX) - r_.i16[i] = INT16_MAX; - else if (tmp < INT16_MIN) - r_.i16[i] = INT16_MIN; - else - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, tmp); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccs_epi16(a, b, c) simde_mm_maccs_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccs_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccs_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int64x2_t c_lo = vmovl_s32(vget_low_s32(c_.neon_i32)); - int64x2_t c_hi = vmovl_high_s32(c_.neon_i32); - int64x2_t lo = vmlal_s32(c_lo, vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32)); - int64x2_t hi = vmlal_high_s32(c_hi, a_.neon_i32, b_.neon_i32); - r_.neon_i32 = vcombine_s32(vqmovn_s64(lo), vqmovn_s64(hi)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]); - tmp += HEDLEY_STATIC_CAST(int64_t, c_.i32[i]); - if (tmp > INT32_MAX) - r_.i32[i] = INT32_MAX; - else if (tmp < INT32_MIN) - r_.i32[i] = INT32_MIN; - else - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, tmp); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccs_epi32(a, b, c) simde_mm_maccs_epi32((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccsd_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vqaddq_s32(vmull_s16(vget_low_s16(even), vget_high_s16(even)), c_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - int32_t prod = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]); - r_.i32[i] = simde_math_adds_i32(prod, c_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccsd_epi16(a, b, c) simde_mm_maccsd_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccslo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccslo_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0]); - r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccslo_epi16(a, b, c) simde_mm_maccslo_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maccshi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maccshi_epi32(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1]); - r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maccshi_epi16(a, b, c) simde_mm_maccshi_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maddd_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = - (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + - (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); - r_.i32[i] += c_.i32[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maddd_epi16(a, b, c) simde_mm_maddd_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maddsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_maddsd_epi16(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - /* The AMD64 Architecture Programmer's Manual says that "the" - * addition is saturated; I'm not sure whether that means - * the pairwise addition or the accumulate, or both. */ - r_.i32[i] = - (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + - (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); - r_.i32[i] = simde_math_adds_i32(r_.i32[i], c_.i32[i]); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_maddsd_epi16(a, b, c) simde_mm_maddsd_epi16((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sha_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_sha_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vshlq_s8(a_.neon_i8, b_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - if (b_.i8[i] < 0) { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> -b_.i8[i]); - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << b_.i8[i]); - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_sha_epi8(a, b) simde_mm_sha_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sha_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_sha_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - if (b_.i16[i] < 0) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> -b_.i16[i]); - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << b_.i16[i]); - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_sha_epi16(a, b) simde_mm_sha_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sha_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_sha_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, b_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - if (b_.i32[i] < 0) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> -b_.i32[i]); - } else { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << b_.i32[i]); - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_sha_epi32(a, b) simde_mm_sha_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_sha_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_sha_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshlq_s64(a_.neon_i64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - if (b_.i64[i] < 0) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> -b_.i64[i]); - } else { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << b_.i64[i]); - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_sha_epi64(a, b) simde_mm_sha_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shl_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_shl_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vshlq_u8(a_.neon_u8, b_.neon_i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - if (HEDLEY_UNLIKELY(b_.i8[i] < -7 || b_.i8[i] > 7)) { - r_.u8[i] = 0; - } else { - if (b_.i8[i] < 0) { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> -b_.i8[i]); - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] << b_.i8[i]); - } - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_shl_epi8(a, b) simde_mm_shl_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shl_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_shl_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, b_.neon_i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - if (HEDLEY_UNLIKELY(b_.i16[i] < -15 || b_.i16[i] > 15)) { - r_.u16[i] = 0; - } else { - if (b_.i16[i] < 0) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> -b_.i16[i]); - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << b_.i16[i]); - } - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_shl_epi16(a, b) simde_mm_shl_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shl_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_shl_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, b_.neon_i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - if (HEDLEY_UNLIKELY(b_.i32[i] < -31 || b_.i32[i] > 31)) { - r_.u32[i] = 0; - } else { - if (b_.i32[i] < 0) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> -b_.i32[i]); - } else { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] << b_.i32[i]); - } - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_shl_epi32(a, b) simde_mm_shl_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_shl_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_shl_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, b_.neon_i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - if (HEDLEY_UNLIKELY(b_.i64[i] < -63 || b_.i64[i] > 63)) { - r_.u64[i] = 0; - } else { - if (b_.i64[i] < 0) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> -b_.i64[i]); - } else { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] << b_.i64[i]); - } - } - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_shl_epi64(a, b) simde_mm_shl_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rot_epi8 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_rot_epi8(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (b_.i8[i] < 0) ? - HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -b_.i8[i]) | (a_.u8[i] << ( b_.i8[i] & 7)))) : - HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << b_.i8[i]) | (a_.u8[i] >> (-b_.i8[i] & 7)))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_rot_epi8(a, b) simde_mm_rot_epi8((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rot_epi16 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_rot_epi16(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.i16[i] < 0) ? - HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -b_.i16[i]) | (a_.u16[i] << ( b_.i16[i] & 15)))) : - HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << b_.i16[i]) | (a_.u16[i] >> (-b_.i16[i] & 15)))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_rot_epi16(a, b) simde_mm_rot_epi16((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rot_epi32 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_rot_epi32(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (b_.i32[i] < 0) ? - HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -b_.i32[i]) | (a_.u32[i] << ( b_.i32[i] & 31)))) : - HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << b_.i32[i]) | (a_.u32[i] >> (-b_.i32[i] & 31)))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_rot_epi32(a, b) simde_mm_rot_epi32((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_rot_epi64 (simde__m128i a, simde__m128i b) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_rot_epi64(a, b); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (b_.i64[i] < 0) ? - HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -b_.i64[i]) | (a_.u64[i] << ( b_.i64[i] & 63)))) : - HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << b_.i64[i]) | (a_.u64[i] >> (-b_.i64[i] & 63)))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_rot_epi64(a, b) simde_mm_rot_epi64((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_roti_epi8 (simde__m128i a, const int count) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (count < 0) ? - HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -count) | (a_.u8[i] << ( count & 7)))) : - HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << count) | (a_.u8[i] >> (-count & 7)))); - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_roti_epi8(a, count) _mm_roti_epi8((a), (count)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_roti_epi8(a, b) simde_mm_roti_epi8((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_roti_epi16 (simde__m128i a, const int count) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (count < 0) ? - HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -count) | (a_.u16[i] << ( count & 15)))) : - HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << count) | (a_.u16[i] >> (-count & 15)))); - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_roti_epi16(a, count) _mm_roti_epi16((a), (count)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_roti_epi16(a, count) simde_mm_roti_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_roti_epi32 (simde__m128i a, const int count) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (count < 0) ? - HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -count) | (a_.u32[i] << ( count & 31)))) : - HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << count) | (a_.u32[i] >> (-count & 31)))); - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_roti_epi32(a, count) _mm_roti_epi32((a), (count)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_roti_epi32(a, count) simde_mm_roti_epi32((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_roti_epi64 (simde__m128i a, const int count) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (count < 0) ? - HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -count) | (a_.u64[i] << ( count & 63)))) : - HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << count) | (a_.u64[i] >> (-count & 63)))); - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_roti_epi64(a, count) _mm_roti_epi64((a), (count)) -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_roti_epi64(a, count) simde_mm_roti_epi64((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_perm_epi8 (simde__m128i a, simde__m128i b, simde__m128i c) { - #if defined(SIMDE_X86_XOP_NATIVE) - return _mm_perm_epi8(a, b, c); - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b), - c_ = simde__m128i_to_private(c); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - int8_t src = (c_.u8[i] & 0x10) ? b_.i8[c_.u8[i] & 0xf] : a_.i8[c_.u8[i] & 0xf]; - - switch (c_.u8[i] & 0xc0) { - case 0x40: - #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) - src = HEDLEY_STATIC_CAST(int8_t, __builtin_bitreverse8(HEDLEY_STATIC_CAST(uint8_t, src))); - #else - src = HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t, src) * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101) >> 32); - #endif - break; - case 0x80: - src = 0; - break; - case 0xc0: - src >>= 7; - break; - } - - r_.i8[i] = (c_.u8[i] & 0x20) ? ~src : src; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_perm_epi8(a, b, c) simde_mm_perm_epi8((a), (b), (c)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int imm8) { - simde__m128_private - r_, - a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - simde__m128i_private c_ = simde__m128i_to_private(c); - - const int m2z = imm8 & 0x03; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - const int sel = c_.i32[i] & 0x07; - const int m = c_.i32[i] & 0x08; - - switch (m | m2z) { - case 0xa: - case 0x3: - r_.i32[i] = 0; - break; - default: - r_.i32[i] = (sel > 3) ? b_.i32[sel - 4] : a_.i32[sel]; - break; - } - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_permute2_ps(a, b, c, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_permute2_ps((a), (b), (c), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_permute2_ps(a, b, c, imm8) _mm_permute2_ps((a), (b), (c), (imm8)) - #endif -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_permute2_ps(a, b, c, imm8) simde_mm_permute2_ps((a), (b), (c), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int imm8) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - simde__m128i_private c_ = simde__m128i_to_private(c); - - const int m2z = imm8 & 0x03; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const int sel = (c_.i64[i] & 0x06) >> 1; - const int m = c_.i64[i] & 0x08; - - switch (m | m2z) { - case 0x0a: - case 0x03: - r_.i64[i] = 0; - break; - default: - r_.i64[i] = (sel > 1) ? b_.i64[sel - 2] : a_.i64[sel]; - break; - } - } - - return simde__m128d_from_private(r_); -} - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm_permute2_pd(a, b, c, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm_permute2_pd((a), (b), (c), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm_permute2_pd(a, b, c, imm8) _mm_permute2_pd((a), (b), (c), (imm8)) - #endif -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm_permute2_pd(a, b, c, imm8) simde_mm_permute2_pd((a), (b), (c), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int imm8) { - simde__m256_private - r_, - a_ = simde__m256_to_private(a), - b_ = simde__m256_to_private(b); - simde__m256i_private c_ = simde__m256i_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_mm_permute2_ps(a_.m128[i], b_.m128[i], c_.m128i[i], imm8); - } - #else - const int m2z = imm8 & 0x03; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - const int sel = c_.i32[i] & 0x07; - const int m = c_.i32[i] & 0x08; - - switch (m | m2z) { - case 0xa: - case 0x3: - r_.i32[i] = 0; - break; - default: - r_.i32[i] = (sel > 3) ? b_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4) - 4] : a_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4)]; - break; - } - } - #endif - - return simde__m256_from_private(r_); -} - -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm256_permute2_ps(a, b, c, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm256_permute2_ps((a), (b), (c), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm256_permute2_ps(a, b, c, imm8) _mm256_permute2_ps((a), (b), (c), (imm8)) - #endif -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm256_permute2_ps(a, b, c, imm8) simde_mm256_permute2_ps((a), (b), (c), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const int imm8) { - simde__m256d_private - r_, - a_ = simde__m256d_to_private(a), - b_ = simde__m256d_to_private(b); - simde__m256i_private c_ = simde__m256i_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_mm_permute2_pd(a_.m128d[i], b_.m128d[i], c_.m128i[i], imm8); - } - #else - const int m2z = imm8 & 0x03; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - const int sel = (c_.i64[i] & 0x06) >> 1; - const int m = c_.i64[i] & 0x08; - - switch (m | m2z) { - case 0x0a: - case 0x03: - r_.i64[i] = 0; - break; - default: - r_.i64[i] = (sel > 1) ? b_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2) - 2] : a_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2)]; - break; - } - } - #endif - - return simde__m256d_from_private(r_); -} -#if defined(SIMDE_X86_XOP_NATIVE) - #if defined(HEDLEY_MCST_LCC_VERSION) - #define simde_mm256_permute2_pd(a, b, c, imm8) (__extension__ ({ \ - SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ - _mm256_permute2_pd((a), (b), (c), (imm8)); \ - SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ - })) - #else - #define simde_mm256_permute2_pd(a, b, c, imm8) simde_undeprecated_mm256_permute2_pd((a), (b), (c), (imm8)) - #endif -#endif -#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) - #define _mm256_permute2_pd(a, b, c, imm8) simde_mm256_permute2_pd((a), (b), (c), (imm8)) -#endif - -HEDLEY_DIAGNOSTIC_POP -SIMDE_END_DECLS_ - -#endif /* !defined(SIMDE_X86_XOP_H) */ -/* :: End simde/x86/xop.h :: */ diff --git a/src/sse_mathfun.h b/src/sse_mathfun.h deleted file mode 100644 index ce2507b11..000000000 --- a/src/sse_mathfun.h +++ /dev/null @@ -1,719 +0,0 @@ -/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log - - Inspired by Intel Approximate Math library, and based on the - corresponding algorithms of the cephes math library - - The default is to use the SSE1 version. If you define USE_SSE2 the - the SSE2 intrinsics will be used in place of the MMX intrinsics. Do - not expect any significant performance improvement with SSE2. -*/ - -/* Copyright (C) 2007 Julien Pommier - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - (this is the zlib license) -*/ - -#ifdef __arm64__ -#include "simde/x86/sse.h" -#else -#include -#endif - -/* yes I know, the top of this file is quite ugly */ - -#ifdef _MSC_VER /* visual c++ */ -# define ALIGN16_BEG __declspec(align(16)) -# define ALIGN16_END -#else /* gcc or icc */ -# define ALIGN16_BEG -# define ALIGN16_END __attribute__((aligned(16))) -#endif - -/* __m128 is ugly to write */ -typedef __m128 v4sf; // vector of 4 float (sse1) - -#ifdef USE_SSE2 -#ifdef __arm64__ -# include "simde/x86/sse2.h" -#else -#include -#endif -typedef __m128i v4si; // vector of 4 int (sse2) -#else -typedef __m64 v2si; // vector of 2 int (mmx) -#endif - -/* declare some SSE constants -- why can't I figure a better way to do that? */ -#define _PS_CONST(Name, Val) \ - static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val } -#define _PI32_CONST(Name, Val) \ - static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val } -#define _PS_CONST_TYPE(Name, Type, Val) \ - static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val } - -_PS_CONST(1 , 1.0f); -_PS_CONST(0p5, 0.5f); -/* the smallest non denormalized float number */ -_PS_CONST_TYPE(min_norm_pos, int, 0x00800000); -_PS_CONST_TYPE(mant_mask, int, 0x7f800000); -_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000); - -_PS_CONST_TYPE(sign_mask, int, (int)0x80000000); -_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000); - -_PI32_CONST(1, 1); -_PI32_CONST(inv1, ~1); -_PI32_CONST(2, 2); -_PI32_CONST(4, 4); -_PI32_CONST(0x7f, 0x7f); - -_PS_CONST(cephes_SQRTHF, 0.707106781186547524); -_PS_CONST(cephes_log_p0, 7.0376836292E-2); -_PS_CONST(cephes_log_p1, - 1.1514610310E-1); -_PS_CONST(cephes_log_p2, 1.1676998740E-1); -_PS_CONST(cephes_log_p3, - 1.2420140846E-1); -_PS_CONST(cephes_log_p4, + 1.4249322787E-1); -_PS_CONST(cephes_log_p5, - 1.6668057665E-1); -_PS_CONST(cephes_log_p6, + 2.0000714765E-1); -_PS_CONST(cephes_log_p7, - 2.4999993993E-1); -_PS_CONST(cephes_log_p8, + 3.3333331174E-1); -_PS_CONST(cephes_log_q1, -2.12194440e-4); -_PS_CONST(cephes_log_q2, 0.693359375); - -#ifndef USE_SSE2 -typedef union xmm_mm_union { - __m128 xmm; - __m64 mm[2]; -} xmm_mm_union; - -#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \ - xmm_mm_union u; u.xmm = xmm_; \ - mm0_ = u.mm[0]; \ - mm1_ = u.mm[1]; \ -} - -#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \ - xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \ - } - -#endif // USE_SSE2 - -/* natural logarithm computed for 4 simultaneous float - return NaN for x <= 0 -*/ -v4sf log_ps(v4sf x) { -#ifdef USE_SSE2 - v4si emm0; -#else - v2si mm0, mm1; -#endif - v4sf one = *(v4sf*)_ps_1; - - v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps()); - - x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos); /* cut off denormalized stuff */ - -#ifndef USE_SSE2 - /* part 1: x = frexpf(x, &e); */ - COPY_XMM_TO_MM(x, mm0, mm1); - mm0 = _mm_srli_pi32(mm0, 23); - mm1 = _mm_srli_pi32(mm1, 23); -#else - emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23); -#endif - /* keep only the fractional part */ - x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask); - x = _mm_or_ps(x, *(v4sf*)_ps_0p5); - -#ifndef USE_SSE2 - /* now e=mm0:mm1 contain the really base-2 exponent */ - mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f); - mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f); - v4sf e = _mm_cvtpi32x2_ps(mm0, mm1); - _mm_empty(); /* bye bye mmx */ -#else - emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f); - v4sf e = _mm_cvtepi32_ps(emm0); -#endif - - e = _mm_add_ps(e, one); - - /* part2: - if( x < SQRTHF ) { - e -= 1; - x = x + x - 1.0; - } else { x = x - 1.0; } - */ - v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF); - v4sf tmp = _mm_and_ps(x, mask); - x = _mm_sub_ps(x, one); - e = _mm_sub_ps(e, _mm_and_ps(one, mask)); - x = _mm_add_ps(x, tmp); - - - v4sf z = _mm_mul_ps(x,x); - - v4sf y = *(v4sf*)_ps_cephes_log_p0; - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8); - y = _mm_mul_ps(y, x); - - y = _mm_mul_ps(y, z); - - - tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1); - y = _mm_add_ps(y, tmp); - - - tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5); - y = _mm_sub_ps(y, tmp); - - tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2); - x = _mm_add_ps(x, y); - x = _mm_add_ps(x, tmp); - x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN - return x; -} - -_PS_CONST(exp_hi, 88.3762626647949f); -_PS_CONST(exp_lo, -88.3762626647949f); - -_PS_CONST(cephes_LOG2EF, 1.44269504088896341); -_PS_CONST(cephes_exp_C1, 0.693359375); -_PS_CONST(cephes_exp_C2, -2.12194440e-4); - -_PS_CONST(cephes_exp_p0, 1.9875691500E-4); -_PS_CONST(cephes_exp_p1, 1.3981999507E-3); -_PS_CONST(cephes_exp_p2, 8.3334519073E-3); -_PS_CONST(cephes_exp_p3, 4.1665795894E-2); -_PS_CONST(cephes_exp_p4, 1.6666665459E-1); -_PS_CONST(cephes_exp_p5, 5.0000001201E-1); - -v4sf exp_ps(v4sf x) { - v4sf tmp = _mm_setzero_ps(), fx; -#ifdef USE_SSE2 - v4si emm0; -#else - v2si mm0, mm1; -#endif - v4sf one = *(v4sf*)_ps_1; - - x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi); - x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo); - - /* express exp(x) as exp(g + n*log(2)) */ - fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF); - fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5); - - /* how to perform a floorf with SSE: just below */ -#ifndef USE_SSE2 - /* step 1 : cast to int */ - tmp = _mm_movehl_ps(tmp, fx); - mm0 = _mm_cvttps_pi32(fx); - mm1 = _mm_cvttps_pi32(tmp); - /* step 2 : cast back to float */ - tmp = _mm_cvtpi32x2_ps(mm0, mm1); -#else - emm0 = _mm_cvttps_epi32(fx); - tmp = _mm_cvtepi32_ps(emm0); -#endif - /* if greater, substract 1 */ - v4sf mask = _mm_cmpgt_ps(tmp, fx); - mask = _mm_and_ps(mask, one); - fx = _mm_sub_ps(tmp, mask); - - tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1); - v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2); - x = _mm_sub_ps(x, tmp); - x = _mm_sub_ps(x, z); - - z = _mm_mul_ps(x,x); - - v4sf y = *(v4sf*)_ps_cephes_exp_p0; - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4); - y = _mm_mul_ps(y, x); - y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5); - y = _mm_mul_ps(y, z); - y = _mm_add_ps(y, x); - y = _mm_add_ps(y, one); - - /* build 2^n */ -#ifndef USE_SSE2 - z = _mm_movehl_ps(z, fx); - mm0 = _mm_cvttps_pi32(fx); - mm1 = _mm_cvttps_pi32(z); - mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f); - mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f); - mm0 = _mm_slli_pi32(mm0, 23); - mm1 = _mm_slli_pi32(mm1, 23); - - v4sf pow2n; - COPY_MM_TO_XMM(mm0, mm1, pow2n); - _mm_empty(); -#else - emm0 = _mm_cvttps_epi32(fx); - emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f); - emm0 = _mm_slli_epi32(emm0, 23); - v4sf pow2n = _mm_castsi128_ps(emm0); -#endif - y = _mm_mul_ps(y, pow2n); - return y; -} - -_PS_CONST(minus_cephes_DP1, -0.78515625); -_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); -_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8); -_PS_CONST(sincof_p0, -1.9515295891E-4); -_PS_CONST(sincof_p1, 8.3321608736E-3); -_PS_CONST(sincof_p2, -1.6666654611E-1); -_PS_CONST(coscof_p0, 2.443315711809948E-005); -_PS_CONST(coscof_p1, -1.388731625493765E-003); -_PS_CONST(coscof_p2, 4.166664568298827E-002); -_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI - - -/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so - it runs also on old athlons XPs and the pentium III of your grand - mother. - - The code is the exact rewriting of the cephes sinf function. - Precision is excellent as long as x < 8192 (I did not bother to - take into account the special handling they have for greater values - -- it does not return garbage for arguments over 8192, though, but - the extra precision is missing). - - Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the - surprising but correct result. - - Performance is also surprisingly good, 1.33 times faster than the - macos vsinf SSE2 function, and 1.5 times faster than the - __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not - too bad for an SSE1 function (with no special tuning) ! - However the latter libraries probably have a much better handling of NaN, - Inf, denormalized and other special arguments.. - - On my core 1 duo, the execution of this function takes approximately 95 cycles. - - From what I have observed on the experiments with Intel AMath lib, switching to an - SSE2 version would improve the perf by only 10%. - - Since it is based on SSE intrinsics, it has to be compiled at -O2 to - deliver full speed. -*/ -v4sf sin_ps(v4sf x) { // any x - v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y; - -#ifdef USE_SSE2 - v4si emm0, emm2; -#else - v2si mm0, mm1, mm2, mm3; -#endif - sign_bit = x; - /* take the absolute value */ - x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask); - /* extract the sign bit (upper one) */ - sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask); - - /* scale by 4/Pi */ - y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI); - -#ifdef USE_SSE2 - /* store the integer part of y in mm0 */ - emm2 = _mm_cvttps_epi32(y); - /* j=(j+1) & (~1) (see the cephes sources) */ - emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1); - emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1); - y = _mm_cvtepi32_ps(emm2); - - /* get the swap sign flag */ - emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4); - emm0 = _mm_slli_epi32(emm0, 29); - /* get the polynom selection mask - there is one polynom for 0 <= x <= Pi/4 - and another one for Pi/4